resolved: rework server feature level logic

This changes the DnsServer logic to count failed UDP and TCP failures separately. This is useful so that we don't end
up downgrading the feature level from one UDP level to a lower UDP level just because a TCP connection we did because
of a TC response failed.

This also adds accounting of truncated packets. If we detect incoming truncated packets, and count too many failed TCP
connections (which is the normal fall back if we get a trucnated UDP packet) we downgrade the feature level, given that
the responses at the current levels don't get through, and we somehow need to make sure they become smaller, which they
will do if we don't request DNSSEC or EDNS support.

This makes resolved work much better with crappy DNS servers that do not implement TCP and only limited UDP packet
sizes, but otherwise support DNSSEC RRs. They end up choking on the generally larger DNSSEC RRs and there's no way to
retrieve the full data.
This commit is contained in:
Lennart Poettering 2016-01-08 18:50:41 +01:00
parent ef9fb66c0b
commit 6bb2c08597
3 changed files with 121 additions and 51 deletions

View File

@ -224,31 +224,48 @@ void dns_server_move_back_and_unmark(DnsServer *s) {
}
}
void dns_server_packet_received(DnsServer *s, DnsServerFeatureLevel level, usec_t rtt, size_t size) {
static void dns_server_verified(DnsServer *s, DnsServerFeatureLevel level) {
assert(s);
if (level == DNS_SERVER_FEATURE_LEVEL_LARGE) {
/* Even if we successfully receive a reply to a
request announcing support for large packets, that
does not mean we can necessarily receive large
packets. */
if (s->verified_feature_level > level)
return;
if (s->verified_feature_level < DNS_SERVER_FEATURE_LEVEL_LARGE - 1) {
s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_LARGE - 1;
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
}
} else if (s->verified_feature_level < level) {
if (s->verified_feature_level != level) {
log_debug("Verified feature level %s.", dns_server_feature_level_to_string(level));
s->verified_feature_level = level;
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
}
if (s->possible_feature_level == level)
s->n_failed_attempts = 0;
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
}
void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t rtt, size_t size) {
assert(s);
if (protocol == IPPROTO_UDP) {
if (s->possible_feature_level == level)
s->n_failed_udp = 0;
if (level == DNS_SERVER_FEATURE_LEVEL_LARGE)
/* Even if we successfully receive a reply to a request announcing support for large packets,
that does not mean we can necessarily receive large packets. */
dns_server_verified(s, DNS_SERVER_FEATURE_LEVEL_LARGE - 1);
else
/* A successful UDP reply, verifies UDP, ENDS0 and DO levels */
dns_server_verified(s, level);
} else if (protocol == IPPROTO_TCP) {
if (s->possible_feature_level == level)
s->n_failed_tcp = 0;
/* Successful TCP connections are only useful to verify the TCP feature level. */
dns_server_verified(s, DNS_SERVER_FEATURE_LEVEL_TCP);
}
/* Remember the size of the largest UDP packet we received from a server,
we know that we can always announce support for packets with at least
this size. */
if (s->received_udp_packet_max < size)
if (protocol == IPPROTO_UDP && s->received_udp_packet_max < size)
s->received_udp_packet_max = size;
if (s->max_rtt < rtt) {
@ -257,12 +274,16 @@ void dns_server_packet_received(DnsServer *s, DnsServerFeatureLevel level, usec_
}
}
void dns_server_packet_lost(DnsServer *s, DnsServerFeatureLevel level, usec_t usec) {
void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t usec) {
assert(s);
assert(s->manager);
if (s->possible_feature_level == level)
s->n_failed_attempts ++;
if (s->possible_feature_level == level) {
if (protocol == IPPROTO_UDP)
s->n_failed_udp ++;
else if (protocol == IPPROTO_TCP)
s->n_failed_tcp ++;
}
if (s->resend_timeout > usec)
return;
@ -274,18 +295,24 @@ void dns_server_packet_failed(DnsServer *s, DnsServerFeatureLevel level) {
assert(s);
assert(s->manager);
/* Invoked whenever we get a FORMERR, SERVFAIL or NOTIMP rcode from a server. */
if (s->possible_feature_level != level)
return;
/* Invoked whenever we get a FORMERR, SERVFAIL or NOTIMP rcode from a server. This is an immediate trigger for
* us to go one feature level down. Except when we are already at TCP or UDP level, in which case there's no
* point in changing, under the assumption that packet failures are caused by packet contents, not by used
* transport. */
s->packet_failed = true;
}
if (s->possible_feature_level <= DNS_SERVER_FEATURE_LEVEL_UDP)
void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level) {
assert(s);
assert(s->manager);
/* Invoked whenever we get a packet with TC bit set. */
if (s->possible_feature_level != level)
return;
s->n_failed_attempts = (unsigned) -1;
s->packet_truncated = true;
}
void dns_server_packet_rrsig_missing(DnsServer *s) {
@ -326,35 +353,71 @@ DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) {
_cleanup_free_ char *ip = NULL;
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_BEST;
s->n_failed_attempts = 0;
s->n_failed_udp = 0;
s->n_failed_tcp = 0;
s->packet_failed = false;
s->packet_truncated = false;
s->verified_usec = 0;
s->rrsig_missing = false;
in_addr_to_string(s->family, &s->address, &ip);
log_info("Grace period over, resuming full feature set for DNS server %s", strna(ip));
log_info("Grace period over, resuming full feature set (%s) for DNS server %s",
dns_server_feature_level_to_string(s->possible_feature_level), strna(ip));
} else if (s->possible_feature_level <= s->verified_feature_level)
s->possible_feature_level = s->verified_feature_level;
else if (s->n_failed_attempts >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS) {
_cleanup_free_ char *ip = NULL;
else {
DnsServerFeatureLevel p = s->possible_feature_level;
/* Switch one feature level down. Except when we are at TCP already, in which case we try UDP
* again. Thus, if a DNS server is not responding we'll keep toggling between UDP and TCP until it
* responds on one of them. Note that we generally prefer UDP over TCP (which is why it is at a higher
* feature level), but many DNS servers support lack TCP support. */
if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
s->possible_feature_level == DNS_SERVER_FEATURE_LEVEL_TCP)
if (s->possible_feature_level == DNS_SERVER_FEATURE_LEVEL_TCP)
/* We are at the TCP (lowest) level, and we tried a couple of TCP connections, and it didn't
* work. Upgrade back to UDP again. */
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
else {
assert(s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_WORST);
s->possible_feature_level --;
else if ((s->n_failed_udp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_UDP) ||
(s->packet_failed &&
s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_UDP) ||
(s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
s->packet_truncated &&
s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_UDP))
/* Downgrade the feature one level, maybe things will work better then. We do this under any of
* three conditions:
*
* 1. We lost too many UDP packets in a row, and are on a feature level of UDP or higher. If
* the packets are lost, maybe the server cannot parse them, hence downgrading sounds like a
* good idea. We might downgrade all the way down to TCP this way.
*
* 2. We got a failure packet, and are at a feature level above UDP. Note that in this case we
* downgrade no further than UDP, under the assumption that a failure packet indicates an
* incompatible packet contents, but not a problem with the transport.
*
* 3. We got too many TCP connection failures in a row, we had at least one truncated packet,
* and are on a feature level above UDP. By downgrading things and getting rid of DNSSEC or
* EDNS0 data we hope to make the packet smaller, so that it still works via UDP given that
* TCP appears not to be a fallback. Note that if we are already at the lowest UDP level, we
* don't go further down, since that's TCP, and TCP failed too often after all.
*/
s->possible_feature_level--;
if (p != s->possible_feature_level) {
_cleanup_free_ char *ip = NULL;
/* We changed the feature level, reset the counting */
s->n_failed_udp = 0;
s->n_failed_tcp = 0;
s->packet_failed = false;
s->packet_truncated = false;
s->verified_usec = 0;
in_addr_to_string(s->family, &s->address, &ip);
log_warning("Using degraded feature set (%s) for DNS server %s",
dns_server_feature_level_to_string(s->possible_feature_level), strna(ip));
}
s->n_failed_attempts = 0;
s->verified_usec = 0;
in_addr_to_string(s->family, &s->address, &ip);
log_warning("Using degraded feature set (%s) for DNS server %s",
dns_server_feature_level_to_string(s->possible_feature_level), strna(ip));
}
return s->possible_feature_level;

View File

@ -67,7 +67,10 @@ struct DnsServer {
DnsServerFeatureLevel verified_feature_level;
DnsServerFeatureLevel possible_feature_level;
size_t received_udp_packet_max;
unsigned n_failed_attempts;
unsigned n_failed_udp;
unsigned n_failed_tcp;
bool packet_failed:1;
bool packet_truncated:1;
usec_t verified_usec;
usec_t features_grace_period_usec;
@ -99,9 +102,10 @@ DnsServer* dns_server_unref(DnsServer *s);
void dns_server_unlink(DnsServer *s);
void dns_server_move_back_and_unmark(DnsServer *s);
void dns_server_packet_received(DnsServer *s, DnsServerFeatureLevel level, usec_t rtt, size_t size);
void dns_server_packet_lost(DnsServer *s, DnsServerFeatureLevel level, usec_t usec);
void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t rtt, size_t size);
void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t usec);
void dns_server_packet_failed(DnsServer *s, DnsServerFeatureLevel level);
void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level);
void dns_server_packet_rrsig_missing(DnsServer *s);
DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s);

View File

@ -370,7 +370,7 @@ static int on_stream_complete(DnsStream *s, int error) {
log_debug_errno(error, "Connection failure for DNS TCP stream, treating as lost packet: %m");
assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &usec) >= 0);
dns_server_packet_lost(t->server, t->current_features, usec - t->start_usec);
dns_server_packet_lost(t->server, IPPROTO_TCP, t->current_features, usec - t->start_usec);
dns_transaction_retry(t);
return 0;
@ -670,8 +670,10 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) {
dns_server_packet_failed(t->server, t->current_features);
dns_transaction_retry(t);
return;
} else
dns_server_packet_received(t->server, t->current_features, ts - t->start_usec, p->size);
} else if (DNS_PACKET_TC(p))
dns_server_packet_truncated(t->server, t->current_features);
else
dns_server_packet_received(t->server, p->ipproto, t->current_features, ts - t->start_usec, p->size);
break;
@ -797,7 +799,7 @@ static int on_dns_packet(sd_event_source *s, int fd, uint32_t revents, void *use
log_debug_errno(r, "Connection failure for DNS UDP packet, treating as lost packet: %m");
assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &usec) >= 0);
dns_server_packet_lost(t->server, t->current_features, usec - t->start_usec);
dns_server_packet_lost(t->server, IPPROTO_UDP, t->current_features, usec - t->start_usec);
dns_transaction_retry(t);
return 0;
@ -889,7 +891,7 @@ static int on_transaction_timeout(sd_event_source *s, usec_t usec, void *userdat
case DNS_PROTOCOL_DNS:
assert(t->server);
dns_server_packet_lost(t->server, t->current_features, usec - t->start_usec);
dns_server_packet_lost(t->server, t->stream ? IPPROTO_TCP : IPPROTO_UDP, t->current_features, usec - t->start_usec);
break;
case DNS_PROTOCOL_LLMNR:
@ -2375,6 +2377,7 @@ int dns_transaction_validate_dnssec(DnsTransaction *t) {
(t->server && t->server->rrsig_missing)) {
/* The server does not support DNSSEC, or doesn't augment responses with RRSIGs. */
t->answer_dnssec_result = DNSSEC_INCOMPATIBLE_SERVER;
log_debug("Cannot validate reponse, server lacks DNSSEC support.");
return 0;
}