From 201d99584ed7af8078bb243ce2587e5455074713 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Feb 2017 18:28:23 +0100 Subject: [PATCH] resolved: cache SERVFAIL responses for 30s Some domains (such as us.ynuf.alipay.com) almost appear as if they actively want to sabotage our DNSSEC work. Specifically, they unconditionally return SERVFAIL on SOA lookups and always only after a 1s delay (at least). This is pretty bad for our validation logic, as we use SOA lookups to distuingish zones from non-terminal names. Moreover, SERVFAIL is an error that is typically returned if we send requests a server doesn't grok, and thus is reason for us to downgrade our protocol and try again. In case of these zones this means we'll accept the SERVFAIL response only after a full iterative downgrade to our lowest feature level: TCP. In combination with the 1s delays this has the effect of making us hit our transaction timeout way to easily. As first attempt to improve the situation: let's start caching SERVFAIL responses in our cache, after the full downgrade for a short period of time. Conceptually this is exposed as "weird rcode" caching, but for now we only consider SERVFAIL a "weird rcode" worthy of caching. Later on we might want to add more. --- src/resolve/resolved-dns-cache.c | 123 +++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 30 deletions(-) diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c index c43a7865dc..ee93147015 100644 --- a/src/resolve/resolved-dns-cache.c +++ b/src/resolve/resolved-dns-cache.c @@ -34,6 +34,10 @@ /* We never keep any item longer than 2h in our cache */ #define CACHE_TTL_MAX_USEC (2 * USEC_PER_HOUR) +/* How long to cache strange rcodes, i.e. rcodes != SUCCESS and != NXDOMAIN (specifically: that's only SERVFAIL for + * now) */ +#define CACHE_TTL_STRANGE_RCODE_USEC (30 * USEC_PER_SEC) + typedef enum DnsCacheItemType DnsCacheItemType; typedef struct DnsCacheItem DnsCacheItem; @@ -41,12 +45,14 @@ enum DnsCacheItemType { DNS_CACHE_POSITIVE, DNS_CACHE_NODATA, DNS_CACHE_NXDOMAIN, + DNS_CACHE_RCODE, /* "strange" RCODE (effective only SERVFAIL for now) */ }; struct DnsCacheItem { DnsCacheItemType type; DnsResourceKey *key; DnsResourceRecord *rr; + int rcode; usec_t until; bool authenticated:1; @@ -60,6 +66,27 @@ struct DnsCacheItem { LIST_FIELDS(DnsCacheItem, by_key); }; +static const char *dns_cache_item_type_to_string(DnsCacheItem *item) { + assert(item); + + switch (item->type) { + + case DNS_CACHE_POSITIVE: + return "POSITIVE"; + + case DNS_CACHE_NODATA: + return "NODATA"; + + case DNS_CACHE_NXDOMAIN: + return "NXDOMAIN"; + + case DNS_CACHE_RCODE: + return dns_rcode_to_string(item->rcode); + } + + return NULL; +} + static void dns_cache_item_free(DnsCacheItem *i) { if (!i) return; @@ -484,7 +511,6 @@ static int dns_cache_put_negative( assert(c); assert(key); - assert(soa); assert(owner_address); /* Never cache pseudo RR keys. DNS_TYPE_ANY is particularly @@ -495,13 +521,17 @@ static int dns_cache_put_negative( if (dns_type_is_pseudo(key->type)) return 0; - if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) { - log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s", - dns_resource_key_to_string(key, key_str, sizeof key_str)); - return 0; - } + if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) { + if (!soa) + return 0; - if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) + /* For negative replies, check if we have a TTL of a SOA */ + if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) { + log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s", + dns_resource_key_to_string(key, key_str, sizeof key_str)); + return 0; + } + } else if (rcode != DNS_RCODE_SERVFAIL) return 0; r = dns_cache_init(c); @@ -514,12 +544,17 @@ static int dns_cache_put_negative( if (!i) return -ENOMEM; - i->type = rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA : DNS_CACHE_NXDOMAIN; - i->until = calculate_until(soa, nsec_ttl, timestamp, true); + i->type = + rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA : + rcode == DNS_RCODE_NXDOMAIN ? DNS_CACHE_NXDOMAIN : DNS_CACHE_RCODE; + i->until = + i->type == DNS_CACHE_RCODE ? timestamp + CACHE_TTL_STRANGE_RCODE_USEC : + calculate_until(soa, nsec_ttl, timestamp, true); i->authenticated = authenticated; i->owner_family = owner_family; i->owner_address = *owner_address; i->prioq_idx = PRIOQ_IDX_NULL; + i->rcode = rcode; if (i->type == DNS_CACHE_NXDOMAIN) { /* NXDOMAIN entries should apply equally to all types, so we use ANY as @@ -543,7 +578,7 @@ static int dns_cache_put_negative( return r; log_debug("Added %s cache entry for %s "USEC_FMT"s", - i->type == DNS_CACHE_NODATA ? "NODATA" : "NXDOMAIN", + dns_cache_item_type_to_string(i), dns_resource_key_to_string(i->key, key_str, sizeof key_str), (i->until - timestamp) / USEC_PER_SEC); @@ -615,6 +650,7 @@ int dns_cache_put( const union in_addr_union *owner_address) { DnsResourceRecord *soa = NULL, *rr; + bool weird_rcode = false; DnsAnswerFlags flags; unsigned cache_keys; int r, ifindex; @@ -624,18 +660,28 @@ int dns_cache_put( dns_cache_remove_previous(c, key, answer); - /* We only care for positive replies and NXDOMAINs, on all - * other replies we will simply flush the respective entries, - * and that's it */ - if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) - return 0; + /* We only care for positive replies and NXDOMAINs, on all other replies we will simply flush the respective + * entries, and that's it. (Well, with one further exception: since some DNS zones (akamai!) return SERVFAIL + * consistently for some lookups, and forwarders tend to propagate that we'll cache that too, but only for a + * short time.) */ - if (dns_answer_size(answer) <= 0) { - char key_str[DNS_RESOURCE_KEY_STRING_MAX]; + if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) { - log_debug("Not caching negative entry without a SOA record: %s", - dns_resource_key_to_string(key, key_str, sizeof key_str)); - return 0; + if (dns_answer_size(answer) <= 0) { + char key_str[DNS_RESOURCE_KEY_STRING_MAX]; + + log_debug("Not caching negative entry without a SOA record: %s", + dns_resource_key_to_string(key, key_str, sizeof key_str)); + return 0; + } + + } else { + /* Only cache SERVFAIL as "weird" rcode for now. We can add more later, should that turn out to be + * beneficial. */ + if (rcode != DNS_RCODE_SERVFAIL) + return 0; + + weird_rcode = true; } cache_keys = dns_answer_size(answer); @@ -690,19 +736,20 @@ int dns_cache_put( if (r > 0) return 0; - /* See https://tools.ietf.org/html/rfc2308, which say that a - * matching SOA record in the packet is used to enable - * negative caching. */ + /* See https://tools.ietf.org/html/rfc2308, which say that a matching SOA record in the packet is used to + * enable negative caching. We apply one exception though: if we are about to cache a weird rcode we do so + * regardless of a SOA. */ r = dns_answer_find_soa(answer, key, &soa, &flags); if (r < 0) goto fail; - if (r == 0) - return 0; - - /* Refuse using the SOA data if it is unsigned, but the key is - * signed */ - if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0) + if (r == 0 && !weird_rcode) return 0; + if (r > 0) { + /* Refuse using the SOA data if it is unsigned, but the key is + * signed */ + if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0) + return 0; + } r = dns_cache_put_negative( c, @@ -799,6 +846,7 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod DnsCacheItem *j, *first, *nsec = NULL; bool have_authenticated = false, have_non_authenticated = false; usec_t current; + int found_rcode = -1; assert(c); assert(key); @@ -842,6 +890,8 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod n++; } else if (j->type == DNS_CACHE_NXDOMAIN) nxdomain = true; + else if (j->type == DNS_CACHE_RCODE) + found_rcode = j->rcode; if (j->authenticated) have_authenticated = true; @@ -849,6 +899,19 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod have_non_authenticated = true; } + if (found_rcode >= 0) { + log_debug("RCODE %s cache hit for %s", + dns_rcode_to_string(found_rcode), + dns_resource_key_to_string(key, key_str, sizeof(key_str))); + + *ret = NULL; + *rcode = found_rcode; + *authenticated = false; + + c->n_hit++; + return 1; + } + if (nsec && !IN_SET(key->type, DNS_TYPE_NSEC, DNS_TYPE_DS)) { /* Note that we won't derive information for DS RRs from an NSEC, because we only cache NSEC RRs from * the lower-zone of a zone cut, but the DS RRs are on the upper zone. */ @@ -1042,7 +1105,7 @@ void dns_cache_dump(DnsCache *cache, FILE *f) { fputs(dns_resource_key_to_string(j->key, key_str, sizeof key_str), f); fputs(" -- ", f); - fputs(j->type == DNS_CACHE_NODATA ? "NODATA" : "NXDOMAIN", f); + fputs(dns_cache_item_type_to_string(j), f); fputc('\n', f); } }