resolved: transaction - exponentially increase retry timeouts

Rather than fixing this to 5s for unicast DNS and 1s for LLMNR, start
at a tenth of those values and increase exponentially until the old
values are reached. For LLMNR the recommended timeout for IEEE802
networks (which basically means all of the ones we care about) is 100ms,
so that should be uncontroversial. For unicast DNS I have found no
recommended value. However, it seems vastly more likely that hitting a
500ms timeout is casued by a packet loss, rather than the RTT genuinely
being greater than 500ms, so taking this as a startnig value seems
reasonable to me.

In the common case this greatly reduces the latency due to normal packet
loss. Moreover, once we get support for probing for features, this means
that we can send more packets before degrading the feature level whilst
still allowing us to settle on the correct feature level in a reasonable
timeframe.

The timeouts are tracked per server (or per scope for the multicast
protocols), and once a server (or scope) receives a successfull package
the timeout is reset. We also track the largest RTT for the given
server/scope, and always start our timouts at twice the largest
observed RTT.
This commit is contained in:
Tom Gundersen 2015-07-28 02:32:24 +02:00
parent 240b589b14
commit 9df3ba6c6c
6 changed files with 113 additions and 11 deletions

View File

@ -34,6 +34,10 @@
#define MULTICAST_RATELIMIT_INTERVAL_USEC (1*USEC_PER_SEC)
#define MULTICAST_RATELIMIT_BURST 1000
/* After how much time to repeat LLMNR requests, see RFC 4795 Section 7 */
#define MULTICAST_RESEND_TIMEOUT_MIN_USEC (100 * USEC_PER_MSEC)
#define MULTICAST_RESEND_TIMEOUT_MAX_USEC (1 * USEC_PER_SEC)
int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol protocol, int family) {
DnsScope *s;
@ -48,6 +52,7 @@ int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol protocol, int
s->link = l;
s->protocol = protocol;
s->family = family;
s->resend_timeout = MULTICAST_RESEND_TIMEOUT_MIN_USEC;
LIST_PREPEND(scopes, m->dns_scopes, s);
@ -125,6 +130,23 @@ void dns_scope_next_dns_server(DnsScope *s) {
manager_next_dns_server(s->manager);
}
void dns_scope_packet_received(DnsScope *s, usec_t rtt) {
assert(s);
if (rtt > s->max_rtt) {
s->max_rtt = rtt;
s->resend_timeout = MIN(MAX(MULTICAST_RESEND_TIMEOUT_MIN_USEC, s->max_rtt * 2),
MULTICAST_RESEND_TIMEOUT_MAX_USEC);
}
}
void dns_scope_packet_lost(DnsScope *s, usec_t usec) {
assert(s);
if (s->resend_timeout <= usec)
s->resend_timeout = MIN(s->resend_timeout * 2, MULTICAST_RESEND_TIMEOUT_MAX_USEC);
}
int dns_scope_emit(DnsScope *s, int fd, DnsPacket *p) {
union in_addr_union addr;
int ifindex = 0, r;

View File

@ -57,6 +57,9 @@ struct DnsScope {
RateLimit ratelimit;
usec_t resend_timeout;
usec_t max_rtt;
LIST_HEAD(DnsTransaction, transactions);
LIST_FIELDS(DnsScope, scopes);
@ -65,6 +68,9 @@ struct DnsScope {
int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol p, int family);
DnsScope* dns_scope_free(DnsScope *s);
void dns_scope_packet_received(DnsScope *s, usec_t rtt);
void dns_scope_packet_lost(DnsScope *s, usec_t usec);
int dns_scope_emit(DnsScope *s, int fd, DnsPacket *p);
int dns_scope_tcp_socket(DnsScope *s, int family, const union in_addr_union *address, uint16_t port, DnsServer **server);
int dns_scope_udp_dns_socket(DnsScope *s, DnsServer **server);

View File

@ -23,6 +23,10 @@
#include "resolved-dns-server.h"
/* After how much time to repeat classic DNS requests */
#define DNS_TIMEOUT_MIN_USEC (500 * USEC_PER_MSEC)
#define DNS_TIMEOUT_MAX_USEC (5 * USEC_PER_SEC)
int dns_server_new(
Manager *m,
DnsServer **ret,
@ -45,6 +49,7 @@ int dns_server_new(
s->type = type;
s->family = family;
s->address = *in_addr;
s->resend_timeout = DNS_TIMEOUT_MIN_USEC;
if (type == DNS_SERVER_LINK) {
LIST_FIND_TAIL(servers, l->dns_servers, tail);
@ -115,6 +120,23 @@ DnsServer* dns_server_unref(DnsServer *s) {
return NULL;
}
void dns_server_packet_received(DnsServer *s, usec_t rtt) {
assert(s);
if (rtt > s->max_rtt) {
s->max_rtt = rtt;
s->resend_timeout = MIN(MAX(DNS_TIMEOUT_MIN_USEC, s->max_rtt * 2),
DNS_TIMEOUT_MAX_USEC);
}
}
void dns_server_packet_lost(DnsServer *s, usec_t usec) {
assert(s);
if (s->resend_timeout <= usec)
s->resend_timeout = MIN(s->resend_timeout * 2, DNS_TIMEOUT_MAX_USEC);
}
static unsigned long dns_server_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
const DnsServer *s = p;
uint64_t u;

View File

@ -46,6 +46,9 @@ struct DnsServer {
int family;
union in_addr_union address;
usec_t resend_timeout;
usec_t max_rtt;
bool marked:1;
LIST_FIELDS(DnsServer, servers);
@ -62,6 +65,9 @@ int dns_server_new(
DnsServer* dns_server_ref(DnsServer *s);
DnsServer* dns_server_unref(DnsServer *s);
void dns_server_packet_received(DnsServer *s, usec_t rtt);
void dns_server_packet_lost(DnsServer *s, usec_t usec);
DEFINE_TRIVIAL_CLEANUP_FUNC(DnsServer*, dns_server_unref);
extern const struct hash_ops dns_server_hash_ops;

View File

@ -319,11 +319,14 @@ static void dns_transaction_next_dns_server(DnsTransaction *t) {
}
void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) {
usec_t ts;
int r;
assert(t);
assert(p);
assert(t->state == DNS_TRANSACTION_PENDING);
assert(t->scope);
assert(t->scope->manager);
/* Note that this call might invalidate the query. Callers
* should hence not attempt to access the query or transaction
@ -369,6 +372,26 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) {
}
}
r = sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts);
if (r < 0)
ts = now(clock_boottime_or_monotonic());
switch (t->scope->protocol) {
case DNS_PROTOCOL_DNS:
assert(t->server);
dns_server_packet_received(t->server, ts - t->start_usec);
break;
case DNS_PROTOCOL_LLMNR:
case DNS_PROTOCOL_MDNS:
dns_scope_packet_received(t->scope, ts - t->start_usec);
break;
default:
assert_not_reached("Invalid DNS protocol.");
}
if (DNS_PACKET_TC(p)) {
/* Response was truncated, let's try again with good old TCP */
r = dns_transaction_open_tcp(t);
@ -434,9 +457,9 @@ static int on_dns_packet(sd_event_source *s, int fd, uint32_t revents, void *use
return r;
if (dns_packet_validate_reply(p) > 0 &&
DNS_PACKET_ID(p) == t->id) {
DNS_PACKET_ID(p) == t->id)
dns_transaction_process_reply(t, p);
} else
else
log_debug("Invalid DNS packet.");
return 0;
@ -481,6 +504,12 @@ static int on_transaction_timeout(sd_event_source *s, usec_t usec, void *userdat
/* Timeout reached? Try again, with a new server */
dns_transaction_next_dns_server(t);
/* ... and possibly increased timeout */
if (t->server)
dns_server_packet_lost(t->server, usec - t->start_usec);
else
dns_scope_packet_lost(t->scope, usec - t->start_usec);
r = dns_transaction_go(t);
if (r < 0)
dns_transaction_complete(t, DNS_TRANSACTION_RESOURCES);
@ -528,8 +557,26 @@ static int dns_transaction_make_packet(DnsTransaction *t) {
return 0;
}
static usec_t transaction_get_resend_timeout(DnsTransaction *t) {
assert(t);
assert(t->scope);
switch (t->scope->protocol) {
case DNS_PROTOCOL_DNS:
assert(t->server);
return t->server->resend_timeout;
case DNS_PROTOCOL_LLMNR:
case DNS_PROTOCOL_MDNS:
return t->scope->resend_timeout;
default:
assert_not_reached("Invalid DNS protocol.");
}
}
int dns_transaction_go(DnsTransaction *t) {
bool had_stream;
usec_t ts;
int r;
assert(t);
@ -555,7 +602,12 @@ int dns_transaction_go(DnsTransaction *t) {
return 0;
}
r = sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts);
if (r < 0)
ts = now(clock_boottime_or_monotonic());
t->n_attempts++;
t->start_usec = ts;
t->received = dns_packet_unref(t->received);
t->cached = dns_answer_unref(t->cached);
t->cached_rcode = 0;
@ -600,7 +652,7 @@ int dns_transaction_go(DnsTransaction *t) {
t->scope->manager->event,
&t->timeout_event_source,
clock_boottime_or_monotonic(),
now(clock_boottime_or_monotonic()) + jitter,
ts + jitter,
LLMNR_JITTER_INTERVAL_USEC,
on_transaction_timeout, t);
if (r < 0)
@ -660,7 +712,7 @@ int dns_transaction_go(DnsTransaction *t) {
t->scope->manager->event,
&t->timeout_event_source,
clock_boottime_or_monotonic(),
now(clock_boottime_or_monotonic()) + TRANSACTION_TIMEOUT_USEC(t->scope->protocol), 0,
ts + transaction_get_resend_timeout(t), 0,
on_transaction_timeout, t);
if (r < 0)
return r;

View File

@ -58,6 +58,7 @@ struct DnsTransaction {
DnsAnswer *cached;
int cached_rcode;
usec_t start_usec;
sd_event_source *timeout_event_source;
unsigned n_attempts;
@ -95,12 +96,6 @@ void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state);
const char* dns_transaction_state_to_string(DnsTransactionState p) _const_;
DnsTransactionState dns_transaction_state_from_string(const char *s) _pure_;
/* After how much time to repeat classic DNS requests */
#define DNS_TRANSACTION_TIMEOUT_USEC (5 * USEC_PER_SEC)
/* After how much time to repeat LLMNR requests, see RFC 4795 Section 7 */
#define LLMNR_TRANSACTION_TIMEOUT_USEC (1 * USEC_PER_SEC)
/* LLMNR Jitter interval, see RFC 4795 Section 7 */
#define LLMNR_JITTER_INTERVAL_USEC (100 * USEC_PER_MSEC)
@ -110,5 +105,4 @@ DnsTransactionState dns_transaction_state_from_string(const char *s) _pure_;
/* Maximum attempts to send LLMNR requests, see RFC 4795 Section 2.7 */
#define LLMNR_TRANSACTION_ATTEMPTS_MAX 3
#define TRANSACTION_TIMEOUT_USEC(p) (p == DNS_PROTOCOL_LLMNR ? LLMNR_TRANSACTION_TIMEOUT_USEC : DNS_TRANSACTION_TIMEOUT_USEC)
#define TRANSACTION_ATTEMPTS_MAX(p) (p == DNS_PROTOCOL_LLMNR ? LLMNR_TRANSACTION_ATTEMPTS_MAX : DNS_TRANSACTION_ATTEMPTS_MAX)