resolved: cache SERVFAIL responses for 30s

Some domains (such as us.ynuf.alipay.com) almost appear as if they actively
want to sabotage our DNSSEC work. Specifically, they unconditionally
return SERVFAIL on SOA lookups and always only after a 1s delay (at
least). This is pretty bad for our validation logic, as we use SOA
lookups to distuingish zones from non-terminal names. Moreover, SERVFAIL
is an error that is typically returned if we send requests a server
doesn't grok, and thus is reason for us to downgrade our protocol and
try again. In case of these zones this means we'll accept the SERVFAIL
response only after a full iterative downgrade to our lowest feature
level: TCP. In combination with the 1s delays this has the effect of
making us hit our transaction timeout way to easily.

As first attempt to improve the situation: let's start caching SERVFAIL
responses in our cache, after the full downgrade for a short period of
time.

Conceptually this is exposed as "weird rcode" caching, but for now we
only consider SERVFAIL a "weird rcode" worthy of caching. Later on we
might want to add more.
This commit is contained in:
Lennart Poettering 2017-02-15 18:28:23 +01:00
parent dc349f5f7a
commit 201d99584e
1 changed files with 93 additions and 30 deletions

View File

@ -34,6 +34,10 @@
/* We never keep any item longer than 2h in our cache */
#define CACHE_TTL_MAX_USEC (2 * USEC_PER_HOUR)
/* How long to cache strange rcodes, i.e. rcodes != SUCCESS and != NXDOMAIN (specifically: that's only SERVFAIL for
* now) */
#define CACHE_TTL_STRANGE_RCODE_USEC (30 * USEC_PER_SEC)
typedef enum DnsCacheItemType DnsCacheItemType;
typedef struct DnsCacheItem DnsCacheItem;
@ -41,12 +45,14 @@ enum DnsCacheItemType {
DNS_CACHE_POSITIVE,
DNS_CACHE_NODATA,
DNS_CACHE_NXDOMAIN,
DNS_CACHE_RCODE, /* "strange" RCODE (effective only SERVFAIL for now) */
};
struct DnsCacheItem {
DnsCacheItemType type;
DnsResourceKey *key;
DnsResourceRecord *rr;
int rcode;
usec_t until;
bool authenticated:1;
@ -60,6 +66,27 @@ struct DnsCacheItem {
LIST_FIELDS(DnsCacheItem, by_key);
};
static const char *dns_cache_item_type_to_string(DnsCacheItem *item) {
assert(item);
switch (item->type) {
case DNS_CACHE_POSITIVE:
return "POSITIVE";
case DNS_CACHE_NODATA:
return "NODATA";
case DNS_CACHE_NXDOMAIN:
return "NXDOMAIN";
case DNS_CACHE_RCODE:
return dns_rcode_to_string(item->rcode);
}
return NULL;
}
static void dns_cache_item_free(DnsCacheItem *i) {
if (!i)
return;
@ -484,7 +511,6 @@ static int dns_cache_put_negative(
assert(c);
assert(key);
assert(soa);
assert(owner_address);
/* Never cache pseudo RR keys. DNS_TYPE_ANY is particularly
@ -495,13 +521,17 @@ static int dns_cache_put_negative(
if (dns_type_is_pseudo(key->type))
return 0;
if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) {
log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s",
dns_resource_key_to_string(key, key_str, sizeof key_str));
return 0;
}
if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) {
if (!soa)
return 0;
if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN))
/* For negative replies, check if we have a TTL of a SOA */
if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) {
log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s",
dns_resource_key_to_string(key, key_str, sizeof key_str));
return 0;
}
} else if (rcode != DNS_RCODE_SERVFAIL)
return 0;
r = dns_cache_init(c);
@ -514,12 +544,17 @@ static int dns_cache_put_negative(
if (!i)
return -ENOMEM;
i->type = rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA : DNS_CACHE_NXDOMAIN;
i->until = calculate_until(soa, nsec_ttl, timestamp, true);
i->type =
rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA :
rcode == DNS_RCODE_NXDOMAIN ? DNS_CACHE_NXDOMAIN : DNS_CACHE_RCODE;
i->until =
i->type == DNS_CACHE_RCODE ? timestamp + CACHE_TTL_STRANGE_RCODE_USEC :
calculate_until(soa, nsec_ttl, timestamp, true);
i->authenticated = authenticated;
i->owner_family = owner_family;
i->owner_address = *owner_address;
i->prioq_idx = PRIOQ_IDX_NULL;
i->rcode = rcode;
if (i->type == DNS_CACHE_NXDOMAIN) {
/* NXDOMAIN entries should apply equally to all types, so we use ANY as
@ -543,7 +578,7 @@ static int dns_cache_put_negative(
return r;
log_debug("Added %s cache entry for %s "USEC_FMT"s",
i->type == DNS_CACHE_NODATA ? "NODATA" : "NXDOMAIN",
dns_cache_item_type_to_string(i),
dns_resource_key_to_string(i->key, key_str, sizeof key_str),
(i->until - timestamp) / USEC_PER_SEC);
@ -615,6 +650,7 @@ int dns_cache_put(
const union in_addr_union *owner_address) {
DnsResourceRecord *soa = NULL, *rr;
bool weird_rcode = false;
DnsAnswerFlags flags;
unsigned cache_keys;
int r, ifindex;
@ -624,18 +660,28 @@ int dns_cache_put(
dns_cache_remove_previous(c, key, answer);
/* We only care for positive replies and NXDOMAINs, on all
* other replies we will simply flush the respective entries,
* and that's it */
if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN))
return 0;
/* We only care for positive replies and NXDOMAINs, on all other replies we will simply flush the respective
* entries, and that's it. (Well, with one further exception: since some DNS zones (akamai!) return SERVFAIL
* consistently for some lookups, and forwarders tend to propagate that we'll cache that too, but only for a
* short time.) */
if (dns_answer_size(answer) <= 0) {
char key_str[DNS_RESOURCE_KEY_STRING_MAX];
if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) {
log_debug("Not caching negative entry without a SOA record: %s",
dns_resource_key_to_string(key, key_str, sizeof key_str));
return 0;
if (dns_answer_size(answer) <= 0) {
char key_str[DNS_RESOURCE_KEY_STRING_MAX];
log_debug("Not caching negative entry without a SOA record: %s",
dns_resource_key_to_string(key, key_str, sizeof key_str));
return 0;
}
} else {
/* Only cache SERVFAIL as "weird" rcode for now. We can add more later, should that turn out to be
* beneficial. */
if (rcode != DNS_RCODE_SERVFAIL)
return 0;
weird_rcode = true;
}
cache_keys = dns_answer_size(answer);
@ -690,19 +736,20 @@ int dns_cache_put(
if (r > 0)
return 0;
/* See https://tools.ietf.org/html/rfc2308, which say that a
* matching SOA record in the packet is used to enable
* negative caching. */
/* See https://tools.ietf.org/html/rfc2308, which say that a matching SOA record in the packet is used to
* enable negative caching. We apply one exception though: if we are about to cache a weird rcode we do so
* regardless of a SOA. */
r = dns_answer_find_soa(answer, key, &soa, &flags);
if (r < 0)
goto fail;
if (r == 0)
return 0;
/* Refuse using the SOA data if it is unsigned, but the key is
* signed */
if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0)
if (r == 0 && !weird_rcode)
return 0;
if (r > 0) {
/* Refuse using the SOA data if it is unsigned, but the key is
* signed */
if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0)
return 0;
}
r = dns_cache_put_negative(
c,
@ -799,6 +846,7 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod
DnsCacheItem *j, *first, *nsec = NULL;
bool have_authenticated = false, have_non_authenticated = false;
usec_t current;
int found_rcode = -1;
assert(c);
assert(key);
@ -842,6 +890,8 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod
n++;
} else if (j->type == DNS_CACHE_NXDOMAIN)
nxdomain = true;
else if (j->type == DNS_CACHE_RCODE)
found_rcode = j->rcode;
if (j->authenticated)
have_authenticated = true;
@ -849,6 +899,19 @@ int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcod
have_non_authenticated = true;
}
if (found_rcode >= 0) {
log_debug("RCODE %s cache hit for %s",
dns_rcode_to_string(found_rcode),
dns_resource_key_to_string(key, key_str, sizeof(key_str)));
*ret = NULL;
*rcode = found_rcode;
*authenticated = false;
c->n_hit++;
return 1;
}
if (nsec && !IN_SET(key->type, DNS_TYPE_NSEC, DNS_TYPE_DS)) {
/* Note that we won't derive information for DS RRs from an NSEC, because we only cache NSEC RRs from
* the lower-zone of a zone cut, but the DS RRs are on the upper zone. */
@ -1042,7 +1105,7 @@ void dns_cache_dump(DnsCache *cache, FILE *f) {
fputs(dns_resource_key_to_string(j->key, key_str, sizeof key_str), f);
fputs(" -- ", f);
fputs(j->type == DNS_CACHE_NODATA ? "NODATA" : "NXDOMAIN", f);
fputs(dns_cache_item_type_to_string(j), f);
fputc('\n', f);
}
}