From be808ea083fa07271116b4519c3c27fd20c5f077 Mon Sep 17 00:00:00 2001 From: Tom Gundersen Date: Mon, 6 Jul 2015 08:15:25 +0200 Subject: [PATCH] resolved: fallback to TCP if UDP fails This is inspired by the logic in BIND [0], follow-up patches will implement the reset of that scheme. If we get a server error back, or if after several attempts we don't get a reply at all, we switch from UDP to TCP for the given server for the current and all subsequent requests. However, if we ever successfully received a reply over UDP, we never fall back to TCP, and once a grace-period has passed, we try to upgrade again to using UDP. The grace-period starts off at five minutes after the current feature level was verified and then grows exponentially to six hours. This is to mitigate problems due to temporary lack of network connectivity, but at the same time avoid flooding the network with retries when the feature attempted feature level genuinely does not work. Note that UDP is likely much more commonly supported than TCP, but depending on the path between the client and the server, we may have more luck with TCP in case something is wrong. We really do prefer UDP though, as that is much more lightweight, that is why TCP is only the last resort. [0]: --- src/resolve/resolved-dns-scope.c | 5 ++ src/resolve/resolved-dns-server.c | 90 ++++++++++++++++++++++++-- src/resolve/resolved-dns-server.h | 26 +++++++- src/resolve/resolved-dns-transaction.c | 34 +++++++--- src/resolve/resolved-dns-transaction.h | 3 + 5 files changed, 140 insertions(+), 18 deletions(-) diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c index fc4ae57ce0..a8c0ae1569 100644 --- a/src/resolve/resolved-dns-scope.c +++ b/src/resolve/resolved-dns-scope.c @@ -243,6 +243,11 @@ static int dns_scope_socket(DnsScope *s, int type, int family, const union in_ad if (!srv) return -ESRCH; + srv->possible_features = dns_server_possible_features(srv); + + if (type == SOCK_DGRAM && srv->possible_features < DNS_SERVER_FEATURE_LEVEL_UDP) + return -EAGAIN; + sa.sa.sa_family = srv->family; if (srv->family == AF_INET) { sa.in.sin_port = htobe16(port); diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c index 0ebd22fe22..5fc7c3187f 100644 --- a/src/resolve/resolved-dns-server.c +++ b/src/resolve/resolved-dns-server.c @@ -23,12 +23,20 @@ #include "resolved-dns-server.h" #include "resolved-resolv-conf.h" #include "siphash24.h" +#include "string-table.h" #include "string-util.h" /* After how much time to repeat classic DNS requests */ #define DNS_TIMEOUT_MIN_USEC (500 * USEC_PER_MSEC) #define DNS_TIMEOUT_MAX_USEC (5 * USEC_PER_SEC) +/* The amount of time to wait before retrying with a full feature set */ +#define DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC (6 * USEC_PER_HOUR) +#define DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC (5 * USEC_PER_MINUTE) + +/* The number of times we will attempt a certain feature set before degrading */ +#define DNS_SERVER_FEATURE_RETRY_ATTEMPTS 3 + int dns_server_new( Manager *m, DnsServer **ret, @@ -60,6 +68,9 @@ int dns_server_new( s->n_ref = 1; s->manager = m; + s->verified_features = _DNS_SERVER_FEATURE_LEVEL_INVALID; + s->possible_features = DNS_SERVER_FEATURE_LEVEL_BEST; + s->features_grace_period_usec = DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC; s->type = type; s->family = family; s->address = *in_addr; @@ -212,18 +223,29 @@ void dns_server_move_back_and_unmark(DnsServer *s) { } } -void dns_server_packet_received(DnsServer *s, usec_t rtt) { +void dns_server_packet_received(DnsServer *s, DnsServerFeatureLevel features, usec_t rtt) { assert(s); - if (rtt <= s->max_rtt) - return; + if (s->verified_features < features) { + s->verified_features = features; + assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0); + } - s->max_rtt = rtt; - s->resend_timeout = MIN(MAX(DNS_TIMEOUT_MIN_USEC, s->max_rtt * 2), DNS_TIMEOUT_MAX_USEC); + if (s->possible_features == features) + s->n_failed_attempts = 0; + + if (s->max_rtt < rtt) { + s->max_rtt = rtt; + s->resend_timeout = MIN(MAX(DNS_TIMEOUT_MIN_USEC, s->max_rtt * 2), DNS_TIMEOUT_MAX_USEC); + } } -void dns_server_packet_lost(DnsServer *s, usec_t usec) { +void dns_server_packet_lost(DnsServer *s, DnsServerFeatureLevel features, usec_t usec) { assert(s); + assert(s->manager); + + if (s->possible_features == features) + s->n_failed_attempts ++; if (s->resend_timeout > usec) return; @@ -231,6 +253,56 @@ void dns_server_packet_lost(DnsServer *s, usec_t usec) { s->resend_timeout = MIN(s->resend_timeout * 2, DNS_TIMEOUT_MAX_USEC); } +static bool dns_server_grace_period_expired(DnsServer *s) { + usec_t ts; + + assert(s); + assert(s->manager); + + if (s->verified_usec == 0) + return false; + + assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &ts) >= 0); + + if (s->verified_usec + s->features_grace_period_usec > ts) + return false; + + s->features_grace_period_usec = MIN(s->features_grace_period_usec * 2, DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC); + + return true; +} + +DnsServerFeatureLevel dns_server_possible_features(DnsServer *s) { + assert(s); + + if (s->possible_features != DNS_SERVER_FEATURE_LEVEL_BEST && + dns_server_grace_period_expired(s)) { + _cleanup_free_ char *ip = NULL; + + s->possible_features = DNS_SERVER_FEATURE_LEVEL_BEST; + s->n_failed_attempts = 0; + s->verified_usec = 0; + + in_addr_to_string(s->family, &s->address, &ip); + log_info("Grace period over, resuming full feature set for DNS server %s", strna(ip)); + } else if (s->possible_features <= s->verified_features) + s->possible_features = s->verified_features; + else if (s->n_failed_attempts >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS && + s->possible_features > DNS_SERVER_FEATURE_LEVEL_WORST) { + _cleanup_free_ char *ip = NULL; + + s->possible_features --; + s->n_failed_attempts = 0; + s->verified_usec = 0; + + in_addr_to_string(s->family, &s->address, &ip); + log_warning("Using degraded feature set (%s) for DNS server %s", + dns_server_feature_level_to_string(s->possible_features), strna(ip)); + } + + return s->possible_features; +} + static void dns_server_hash_func(const void *p, struct siphash *state) { const DnsServer *s = p; @@ -392,3 +464,9 @@ void manager_next_dns_server(Manager *m) { else manager_set_dns_server(m, m->dns_servers); } + +static const char* const dns_server_feature_level_table[_DNS_SERVER_FEATURE_LEVEL_MAX] = { + [DNS_SERVER_FEATURE_LEVEL_TCP] = "TCP", + [DNS_SERVER_FEATURE_LEVEL_UDP] = "UDP", +}; +DEFINE_STRING_TABLE_LOOKUP(dns_server_feature_level, DnsServerFeatureLevel); diff --git a/src/resolve/resolved-dns-server.h b/src/resolve/resolved-dns-server.h index 3a78d4a3b5..f82b14e8fb 100644 --- a/src/resolve/resolved-dns-server.h +++ b/src/resolve/resolved-dns-server.h @@ -31,8 +31,21 @@ typedef enum DnsServerType { DNS_SERVER_LINK, } DnsServerType; -#include "resolved-manager.h" +typedef enum DnsServerFeatureLevel { + DNS_SERVER_FEATURE_LEVEL_TCP, + DNS_SERVER_FEATURE_LEVEL_UDP, + _DNS_SERVER_FEATURE_LEVEL_MAX, + _DNS_SERVER_FEATURE_LEVEL_INVALID = -1 +} DnsServerFeatureLevel; + +#define DNS_SERVER_FEATURE_LEVEL_WORST 0 +#define DNS_SERVER_FEATURE_LEVEL_BEST (_DNS_SERVER_FEATURE_LEVEL_MAX - 1) + +const char* dns_server_feature_level_to_string(int i) _const_; +int dns_server_feature_level_from_string(const char *s) _pure_; + #include "resolved-link.h" +#include "resolved-manager.h" struct DnsServer { Manager *manager; @@ -49,6 +62,11 @@ struct DnsServer { usec_t max_rtt; bool marked:1; + DnsServerFeatureLevel verified_features; + DnsServerFeatureLevel possible_features; + unsigned n_failed_attempts; + usec_t verified_usec; + usec_t features_grace_period_usec; /* If linked is set, then this server appears in the servers linked list */ bool linked:1; @@ -69,8 +87,8 @@ DnsServer* dns_server_unref(DnsServer *s); void dns_server_unlink(DnsServer *s); void dns_server_move_back_and_unmark(DnsServer *s); -void dns_server_packet_received(DnsServer *s, usec_t rtt); -void dns_server_packet_lost(DnsServer *s, usec_t usec); +void dns_server_packet_received(DnsServer *s, DnsServerFeatureLevel features, usec_t rtt); +void dns_server_packet_lost(DnsServer *s, DnsServerFeatureLevel features, usec_t usec); DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr); @@ -86,4 +104,6 @@ void manager_next_dns_server(Manager *m); DEFINE_TRIVIAL_CLEANUP_FUNC(DnsServer*, dns_server_unref); +DnsServerFeatureLevel dns_server_possible_features(DnsServer *s); + extern const struct hash_ops dns_server_hash_ops; diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index 8c4f23a4da..0e09a339aa 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -418,7 +418,7 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { case DNS_PROTOCOL_DNS: assert(t->server); - dns_server_packet_received(t->server, ts - t->start_usec); + dns_server_packet_received(t->server, t->current_features, ts - t->start_usec); break; case DNS_PROTOCOL_LLMNR: @@ -534,6 +534,9 @@ static int dns_transaction_emit(DnsTransaction *t) { if (r < 0) return r; + if (t->server) + t->current_features = t->server->possible_features; + return 0; } @@ -544,15 +547,26 @@ static int on_transaction_timeout(sd_event_source *s, usec_t usec, void *userdat assert(s); assert(t); - /* Timeout reached? Try again, with a new server */ - dns_transaction_next_dns_server(t); + /* Timeout reached? Increase the timeout for the server used */ + switch (t->scope->protocol) { + case DNS_PROTOCOL_DNS: + assert(t->server); - /* ... and possibly increased timeout */ - if (t->server) - dns_server_packet_lost(t->server, usec - t->start_usec); - else + dns_server_packet_lost(t->server, t->current_features, usec - t->start_usec); + + break; + case DNS_PROTOCOL_LLMNR: + case DNS_PROTOCOL_MDNS: dns_scope_packet_lost(t->scope, usec - t->start_usec); + break; + default: + assert_not_reached("Invalid DNS protocol."); + } + + /* ...and try again with a new server */ + dns_transaction_next_dns_server(t); + r = dns_transaction_go(t); if (r < 0) dns_transaction_complete(t, DNS_TRANSACTION_RESOURCES); @@ -734,11 +748,13 @@ int dns_transaction_go(DnsTransaction *t) { * always be made via TCP on LLMNR */ r = dns_transaction_open_tcp(t); } else { - /* Try via UDP, and if that fails due to large size try via TCP */ + /* Try via UDP, and if that fails due to large size or lack of + * support try via TCP */ r = dns_transaction_emit(t); - if (r == -EMSGSIZE) + if (r == -EMSGSIZE || r == -EAGAIN) r = dns_transaction_open_tcp(t); } + if (r == -ESRCH) { /* No servers to send this to? */ dns_transaction_complete(t, DNS_TRANSACTION_NO_SERVERS); diff --git a/src/resolve/resolved-dns-transaction.h b/src/resolve/resolved-dns-transaction.h index ee80dcf5a9..5778913cc8 100644 --- a/src/resolve/resolved-dns-transaction.h +++ b/src/resolve/resolved-dns-transaction.h @@ -79,6 +79,9 @@ struct DnsTransaction { /* The active server */ DnsServer *server; + /* the features of the DNS server at time of transaction start */ + DnsServerFeatureLevel current_features; + /* TCP connection logic, if we need it */ DnsStream *stream;