2014-07-16 00:26:02 +02:00
|
|
|
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
|
|
|
|
|
|
|
|
/***
|
|
|
|
This file is part of systemd.
|
|
|
|
|
|
|
|
Copyright 2014 Lennart Poettering
|
|
|
|
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
|
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***/
|
|
|
|
|
2015-10-27 03:01:06 +01:00
|
|
|
#include "alloc-util.h"
|
2014-07-16 00:26:02 +02:00
|
|
|
#include "resolved-dns-server.h"
|
2015-11-24 17:03:12 +01:00
|
|
|
#include "resolved-resolv-conf.h"
|
2015-10-27 03:01:06 +01:00
|
|
|
#include "siphash24.h"
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
#include "string-table.h"
|
2015-11-24 17:03:12 +01:00
|
|
|
#include "string-util.h"
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-07-28 02:32:24 +02:00
|
|
|
/* After how much time to repeat classic DNS requests */
|
|
|
|
#define DNS_TIMEOUT_MIN_USEC (500 * USEC_PER_MSEC)
|
|
|
|
#define DNS_TIMEOUT_MAX_USEC (5 * USEC_PER_SEC)
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
/* The amount of time to wait before retrying with a full feature set */
|
|
|
|
#define DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC (6 * USEC_PER_HOUR)
|
|
|
|
#define DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC (5 * USEC_PER_MINUTE)
|
|
|
|
|
|
|
|
/* The number of times we will attempt a certain feature set before degrading */
|
|
|
|
#define DNS_SERVER_FEATURE_RETRY_ATTEMPTS 3
|
|
|
|
|
2014-07-16 00:26:02 +02:00
|
|
|
int dns_server_new(
|
|
|
|
Manager *m,
|
|
|
|
DnsServer **ret,
|
2014-08-01 16:04:12 +02:00
|
|
|
DnsServerType type,
|
2014-07-16 00:26:02 +02:00
|
|
|
Link *l,
|
2014-07-18 16:09:30 +02:00
|
|
|
int family,
|
2014-07-18 13:59:49 +02:00
|
|
|
const union in_addr_union *in_addr) {
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-11-24 21:39:14 +01:00
|
|
|
DnsServer *s;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
|
|
|
assert(m);
|
2014-08-01 16:04:12 +02:00
|
|
|
assert((type == DNS_SERVER_LINK) == !!l);
|
2014-07-16 00:26:02 +02:00
|
|
|
assert(in_addr);
|
|
|
|
|
2015-11-24 21:39:14 +01:00
|
|
|
if (!IN_SET(family, AF_INET, AF_INET6))
|
|
|
|
return -EAFNOSUPPORT;
|
|
|
|
|
|
|
|
if (l) {
|
|
|
|
if (l->n_dns_servers >= LINK_DNS_SERVERS_MAX)
|
|
|
|
return -E2BIG;
|
|
|
|
} else {
|
|
|
|
if (m->n_dns_servers >= MANAGER_DNS_SERVERS_MAX)
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
2014-07-16 00:26:02 +02:00
|
|
|
s = new0(DnsServer, 1);
|
|
|
|
if (!s)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
s->n_ref = 1;
|
2015-11-24 20:50:37 +01:00
|
|
|
s->manager = m;
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
s->verified_features = _DNS_SERVER_FEATURE_LEVEL_INVALID;
|
|
|
|
s->possible_features = DNS_SERVER_FEATURE_LEVEL_BEST;
|
|
|
|
s->features_grace_period_usec = DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC;
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
s->received_udp_packet_max = DNS_PACKET_UNICAST_SIZE_MAX;
|
2014-08-01 16:04:12 +02:00
|
|
|
s->type = type;
|
2014-07-16 00:26:02 +02:00
|
|
|
s->family = family;
|
|
|
|
s->address = *in_addr;
|
2015-07-28 02:32:24 +02:00
|
|
|
s->resend_timeout = DNS_TIMEOUT_MIN_USEC;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-11-24 20:50:37 +01:00
|
|
|
switch (type) {
|
|
|
|
|
|
|
|
case DNS_SERVER_LINK:
|
|
|
|
s->link = l;
|
2015-11-24 21:39:14 +01:00
|
|
|
LIST_APPEND(servers, l->dns_servers, s);
|
|
|
|
l->n_dns_servers++;
|
2015-11-24 20:50:37 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_SYSTEM:
|
2015-11-24 21:39:14 +01:00
|
|
|
LIST_APPEND(servers, m->dns_servers, s);
|
|
|
|
m->n_dns_servers++;
|
2015-11-24 20:50:37 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_FALLBACK:
|
2015-11-24 21:39:14 +01:00
|
|
|
LIST_APPEND(servers, m->fallback_dns_servers, s);
|
|
|
|
m->n_dns_servers++;
|
2015-11-24 20:50:37 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2014-08-01 16:04:12 +02:00
|
|
|
assert_not_reached("Unknown server type");
|
2015-11-24 20:50:37 +01:00
|
|
|
}
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
s->linked = true;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2014-08-01 16:04:12 +02:00
|
|
|
/* A new DNS server that isn't fallback is added and the one
|
|
|
|
* we used so far was a fallback one? Then let's try to pick
|
|
|
|
* the new one */
|
|
|
|
if (type != DNS_SERVER_FALLBACK &&
|
2014-08-12 19:32:55 +02:00
|
|
|
m->current_dns_server &&
|
|
|
|
m->current_dns_server->type == DNS_SERVER_FALLBACK)
|
|
|
|
manager_set_dns_server(m, NULL);
|
2014-08-01 16:04:12 +02:00
|
|
|
|
2014-07-16 00:26:02 +02:00
|
|
|
if (ret)
|
|
|
|
*ret = s;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
DnsServer* dns_server_ref(DnsServer *s) {
|
2014-07-16 00:26:02 +02:00
|
|
|
if (!s)
|
|
|
|
return NULL;
|
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
assert(s->n_ref > 0);
|
|
|
|
s->n_ref ++;
|
2015-05-18 23:23:17 +02:00
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
DnsServer* dns_server_unref(DnsServer *s) {
|
2015-06-24 18:41:46 +02:00
|
|
|
if (!s)
|
|
|
|
return NULL;
|
2014-08-12 19:32:55 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
assert(s->n_ref > 0);
|
|
|
|
s->n_ref --;
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
if (s->n_ref > 0)
|
|
|
|
return NULL;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
|
|
|
free(s);
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-08-12 12:21:10 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
void dns_server_unlink(DnsServer *s) {
|
|
|
|
assert(s);
|
|
|
|
assert(s->manager);
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
/* This removes the specified server from the linked list of
|
|
|
|
* servers, but any server might still stay around if it has
|
|
|
|
* refs, for example from an ongoing transaction. */
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
if (!s->linked)
|
|
|
|
return;
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
switch (s->type) {
|
|
|
|
|
|
|
|
case DNS_SERVER_LINK:
|
|
|
|
assert(s->link);
|
2015-11-24 21:39:14 +01:00
|
|
|
assert(s->link->n_dns_servers > 0);
|
2015-11-24 17:59:40 +01:00
|
|
|
LIST_REMOVE(servers, s->link->dns_servers, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_SYSTEM:
|
2015-11-24 21:39:14 +01:00
|
|
|
assert(s->manager->n_dns_servers > 0);
|
2015-11-24 17:59:40 +01:00
|
|
|
LIST_REMOVE(servers, s->manager->dns_servers, s);
|
2015-11-24 21:39:14 +01:00
|
|
|
s->manager->n_dns_servers--;
|
2015-11-24 17:59:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_FALLBACK:
|
2015-11-24 21:39:14 +01:00
|
|
|
assert(s->manager->n_dns_servers > 0);
|
2015-11-24 17:59:40 +01:00
|
|
|
LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
|
2015-11-24 21:39:14 +01:00
|
|
|
s->manager->n_dns_servers--;
|
2015-11-24 17:59:40 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->linked = false;
|
|
|
|
|
|
|
|
if (s->link && s->link->current_dns_server == s)
|
|
|
|
link_set_dns_server(s->link, NULL);
|
|
|
|
|
|
|
|
if (s->manager->current_dns_server == s)
|
|
|
|
manager_set_dns_server(s->manager, NULL);
|
|
|
|
|
|
|
|
dns_server_unref(s);
|
2015-06-24 18:41:46 +02:00
|
|
|
}
|
|
|
|
|
2015-11-24 20:50:37 +01:00
|
|
|
void dns_server_move_back_and_unmark(DnsServer *s) {
|
|
|
|
DnsServer *tail;
|
|
|
|
|
|
|
|
assert(s);
|
|
|
|
|
|
|
|
if (!s->marked)
|
|
|
|
return;
|
|
|
|
|
|
|
|
s->marked = false;
|
|
|
|
|
|
|
|
if (!s->linked || !s->servers_next)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Move us to the end of the list, so that the order is
|
|
|
|
* strictly kept, if we are not at the end anyway. */
|
|
|
|
|
|
|
|
switch (s->type) {
|
|
|
|
|
|
|
|
case DNS_SERVER_LINK:
|
|
|
|
assert(s->link);
|
|
|
|
LIST_FIND_TAIL(servers, s, tail);
|
|
|
|
LIST_REMOVE(servers, s->link->dns_servers, s);
|
|
|
|
LIST_INSERT_AFTER(servers, s->link->dns_servers, tail, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_SYSTEM:
|
|
|
|
LIST_FIND_TAIL(servers, s, tail);
|
|
|
|
LIST_REMOVE(servers, s->manager->dns_servers, s);
|
|
|
|
LIST_INSERT_AFTER(servers, s->manager->dns_servers, tail, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_FALLBACK:
|
|
|
|
LIST_FIND_TAIL(servers, s, tail);
|
|
|
|
LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
|
|
|
|
LIST_INSERT_AFTER(servers, s->manager->fallback_dns_servers, tail, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
assert_not_reached("Unknown server type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
void dns_server_packet_received(DnsServer *s, DnsServerFeatureLevel features, usec_t rtt, size_t size) {
|
2015-07-28 02:32:24 +02:00
|
|
|
assert(s);
|
|
|
|
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
if (features == DNS_SERVER_FEATURE_LEVEL_LARGE) {
|
|
|
|
/* even if we successfully receive a reply to a request announcing
|
|
|
|
support for large packets, that does not mean we can necessarily
|
|
|
|
receive large packets. */
|
|
|
|
if (s->verified_features < DNS_SERVER_FEATURE_LEVEL_LARGE - 1) {
|
|
|
|
s->verified_features = DNS_SERVER_FEATURE_LEVEL_LARGE - 1;
|
|
|
|
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
|
|
|
|
}
|
|
|
|
} else if (s->verified_features < features) {
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
s->verified_features = features;
|
|
|
|
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->possible_features == features)
|
|
|
|
s->n_failed_attempts = 0;
|
2015-11-24 17:01:09 +01:00
|
|
|
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
/* Remember the size of the largest UDP packet we received from a server,
|
|
|
|
we know that we can always announce support for packets with at least
|
|
|
|
this size. */
|
|
|
|
if (s->received_udp_packet_max < size)
|
|
|
|
s->received_udp_packet_max = size;
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
if (s->max_rtt < rtt) {
|
|
|
|
s->max_rtt = rtt;
|
|
|
|
s->resend_timeout = MIN(MAX(DNS_TIMEOUT_MIN_USEC, s->max_rtt * 2), DNS_TIMEOUT_MAX_USEC);
|
|
|
|
}
|
2015-07-28 02:32:24 +02:00
|
|
|
}
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
void dns_server_packet_lost(DnsServer *s, DnsServerFeatureLevel features, usec_t usec) {
|
2015-07-28 02:32:24 +02:00
|
|
|
assert(s);
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
assert(s->manager);
|
|
|
|
|
|
|
|
if (s->possible_features == features)
|
|
|
|
s->n_failed_attempts ++;
|
2015-07-28 02:32:24 +02:00
|
|
|
|
2015-11-24 17:01:09 +01:00
|
|
|
if (s->resend_timeout > usec)
|
|
|
|
return;
|
|
|
|
|
|
|
|
s->resend_timeout = MIN(s->resend_timeout * 2, DNS_TIMEOUT_MAX_USEC);
|
2015-07-28 02:32:24 +02:00
|
|
|
}
|
|
|
|
|
2015-07-16 14:39:55 +02:00
|
|
|
void dns_server_packet_failed(DnsServer *s, DnsServerFeatureLevel features) {
|
|
|
|
assert(s);
|
|
|
|
assert(s->manager);
|
|
|
|
|
|
|
|
if (s->possible_features != features)
|
|
|
|
return;
|
|
|
|
|
|
|
|
s->n_failed_attempts = (unsigned) -1;
|
|
|
|
}
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
static bool dns_server_grace_period_expired(DnsServer *s) {
|
|
|
|
usec_t ts;
|
|
|
|
|
|
|
|
assert(s);
|
|
|
|
assert(s->manager);
|
|
|
|
|
|
|
|
if (s->verified_usec == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
|
|
|
|
|
|
|
|
if (s->verified_usec + s->features_grace_period_usec > ts)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
s->features_grace_period_usec = MIN(s->features_grace_period_usec * 2, DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
DnsServerFeatureLevel dns_server_possible_features(DnsServer *s) {
|
|
|
|
assert(s);
|
|
|
|
|
|
|
|
if (s->possible_features != DNS_SERVER_FEATURE_LEVEL_BEST &&
|
|
|
|
dns_server_grace_period_expired(s)) {
|
|
|
|
_cleanup_free_ char *ip = NULL;
|
|
|
|
|
|
|
|
s->possible_features = DNS_SERVER_FEATURE_LEVEL_BEST;
|
|
|
|
s->n_failed_attempts = 0;
|
|
|
|
s->verified_usec = 0;
|
|
|
|
|
|
|
|
in_addr_to_string(s->family, &s->address, &ip);
|
|
|
|
log_info("Grace period over, resuming full feature set for DNS server %s", strna(ip));
|
|
|
|
} else if (s->possible_features <= s->verified_features)
|
|
|
|
s->possible_features = s->verified_features;
|
|
|
|
else if (s->n_failed_attempts >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
|
|
|
|
s->possible_features > DNS_SERVER_FEATURE_LEVEL_WORST) {
|
|
|
|
_cleanup_free_ char *ip = NULL;
|
|
|
|
|
|
|
|
s->possible_features --;
|
|
|
|
s->n_failed_attempts = 0;
|
|
|
|
s->verified_usec = 0;
|
|
|
|
|
|
|
|
in_addr_to_string(s->family, &s->address, &ip);
|
|
|
|
log_warning("Using degraded feature set (%s) for DNS server %s",
|
|
|
|
dns_server_feature_level_to_string(s->possible_features), strna(ip));
|
|
|
|
}
|
|
|
|
|
|
|
|
return s->possible_features;
|
|
|
|
}
|
|
|
|
|
2015-10-04 00:22:41 +02:00
|
|
|
static void dns_server_hash_func(const void *p, struct siphash *state) {
|
2014-08-12 12:21:10 +02:00
|
|
|
const DnsServer *s = p;
|
|
|
|
|
2015-10-04 00:22:41 +02:00
|
|
|
assert(s);
|
2014-08-12 12:21:10 +02:00
|
|
|
|
2015-10-04 00:22:41 +02:00
|
|
|
siphash24_compress(&s->family, sizeof(s->family), state);
|
|
|
|
siphash24_compress(&s->address, FAMILY_ADDRESS_SIZE(s->family), state);
|
2014-08-12 12:21:10 +02:00
|
|
|
}
|
|
|
|
|
2014-08-13 01:00:18 +02:00
|
|
|
static int dns_server_compare_func(const void *a, const void *b) {
|
2014-08-12 12:21:10 +02:00
|
|
|
const DnsServer *x = a, *y = b;
|
|
|
|
|
|
|
|
if (x->family < y->family)
|
|
|
|
return -1;
|
|
|
|
if (x->family > y->family)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
|
|
|
|
}
|
2014-08-13 01:00:18 +02:00
|
|
|
|
|
|
|
const struct hash_ops dns_server_hash_ops = {
|
|
|
|
.hash = dns_server_hash_func,
|
|
|
|
.compare = dns_server_compare_func
|
|
|
|
};
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
void dns_server_unlink_all(DnsServer *first) {
|
|
|
|
DnsServer *next;
|
2015-11-24 17:59:40 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
if (!first)
|
|
|
|
return;
|
2015-11-24 17:59:40 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
next = first->servers_next;
|
|
|
|
dns_server_unlink(first);
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
dns_server_unlink_all(next);
|
2015-11-24 17:59:40 +01:00
|
|
|
}
|
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
void dns_server_unlink_marked(DnsServer *first) {
|
|
|
|
DnsServer *next;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
if (!first)
|
|
|
|
return;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
next = first->servers_next;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
if (first->marked)
|
2015-11-24 17:59:40 +01:00
|
|
|
dns_server_unlink(first);
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
dns_server_unlink_marked(next);
|
|
|
|
}
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
void dns_server_mark_all(DnsServer *first) {
|
|
|
|
if (!first)
|
|
|
|
return;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
first->marked = true;
|
|
|
|
dns_server_mark_all(first->servers_next);
|
2015-11-24 16:48:13 +01:00
|
|
|
}
|
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr) {
|
|
|
|
DnsServer *s;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
|
|
|
LIST_FOREACH(servers, s, first)
|
2015-11-24 21:27:29 +01:00
|
|
|
if (s->family == family && in_addr_equal(family, &s->address, in_addr) > 0)
|
|
|
|
return s;
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
DnsServer *manager_get_first_dns_server(Manager *m, DnsServerType t) {
|
2015-11-24 17:03:12 +01:00
|
|
|
assert(m);
|
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
switch (t) {
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
case DNS_SERVER_SYSTEM:
|
|
|
|
return m->dns_servers;
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
case DNS_SERVER_FALLBACK:
|
|
|
|
return m->fallback_dns_servers;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
2015-11-24 17:03:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
DnsServer *manager_set_dns_server(Manager *m, DnsServer *s) {
|
|
|
|
assert(m);
|
|
|
|
|
|
|
|
if (m->current_dns_server == s)
|
|
|
|
return s;
|
|
|
|
|
|
|
|
if (s) {
|
|
|
|
_cleanup_free_ char *ip = NULL;
|
|
|
|
|
|
|
|
in_addr_to_string(s->family, &s->address, &ip);
|
|
|
|
log_info("Switching to system DNS server %s.", strna(ip));
|
|
|
|
}
|
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
dns_server_unref(m->current_dns_server);
|
|
|
|
m->current_dns_server = dns_server_ref(s);
|
2015-11-24 17:03:12 +01:00
|
|
|
|
|
|
|
if (m->unicast_scope)
|
|
|
|
dns_cache_flush(&m->unicast_scope->cache);
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
DnsServer *manager_get_dns_server(Manager *m) {
|
|
|
|
Link *l;
|
|
|
|
assert(m);
|
|
|
|
|
|
|
|
/* Try to read updates resolv.conf */
|
|
|
|
manager_read_resolv_conf(m);
|
|
|
|
|
|
|
|
/* If no DNS server was chose so far, pick the first one */
|
|
|
|
if (!m->current_dns_server)
|
|
|
|
manager_set_dns_server(m, m->dns_servers);
|
|
|
|
|
|
|
|
if (!m->current_dns_server) {
|
|
|
|
bool found = false;
|
|
|
|
Iterator i;
|
|
|
|
|
|
|
|
/* No DNS servers configured, let's see if there are
|
|
|
|
* any on any links. If not, we use the fallback
|
|
|
|
* servers */
|
|
|
|
|
|
|
|
HASHMAP_FOREACH(l, m->links, i)
|
|
|
|
if (l->dns_servers) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
manager_set_dns_server(m, m->fallback_dns_servers);
|
|
|
|
}
|
|
|
|
|
|
|
|
return m->current_dns_server;
|
|
|
|
}
|
|
|
|
|
|
|
|
void manager_next_dns_server(Manager *m) {
|
|
|
|
assert(m);
|
|
|
|
|
|
|
|
/* If there's currently no DNS server set, then the next
|
|
|
|
* manager_get_dns_server() will find one */
|
|
|
|
if (!m->current_dns_server)
|
|
|
|
return;
|
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
/* Change to the next one, but make sure to follow the linked
|
|
|
|
* list only if the server is still linked. */
|
|
|
|
if (m->current_dns_server->linked && m->current_dns_server->servers_next) {
|
2015-11-24 17:03:12 +01:00
|
|
|
manager_set_dns_server(m, m->current_dns_server->servers_next);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If there was no next one, then start from the beginning of
|
|
|
|
* the list */
|
|
|
|
if (m->current_dns_server->type == DNS_SERVER_FALLBACK)
|
|
|
|
manager_set_dns_server(m, m->fallback_dns_servers);
|
|
|
|
else
|
|
|
|
manager_set_dns_server(m, m->dns_servers);
|
|
|
|
}
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
|
|
|
|
static const char* const dns_server_feature_level_table[_DNS_SERVER_FEATURE_LEVEL_MAX] = {
|
|
|
|
[DNS_SERVER_FEATURE_LEVEL_TCP] = "TCP",
|
|
|
|
[DNS_SERVER_FEATURE_LEVEL_UDP] = "UDP",
|
2015-06-23 23:06:09 +02:00
|
|
|
[DNS_SERVER_FEATURE_LEVEL_EDNS0] = "UDP+EDNS0",
|
2015-06-24 15:08:40 +02:00
|
|
|
[DNS_SERVER_FEATURE_LEVEL_DO] = "UDP+EDNS0+DO",
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
[DNS_SERVER_FEATURE_LEVEL_LARGE] = "UDP+EDNS0+DO+LARGE",
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
};
|
|
|
|
DEFINE_STRING_TABLE_LOOKUP(dns_server_feature_level, DnsServerFeatureLevel);
|