Cache network interface information

Whenever getaddrinfo needed network interface information it used the
netlink interface to read the information every single time.  The
problem is that this information can change at any time.

The patch implements monitoring of the network interfaces through
nscd.  If no change is detected the previously read information can
be reused (which is the norm).  This timestamp information is also
made available to other processes using the shared memory segment
between nscd and those processes.
This commit is contained in:
Ulrich Drepper 2011-10-31 01:51:16 -04:00
parent 636064eb4c
commit 3a2c02424d
10 changed files with 359 additions and 46 deletions

View file

@ -1,3 +1,29 @@
2011-10-31 Ulrich Drepper <drepper@gmail.com>
* include/ifaddrs.h: Declare __free_in6ai and __bump_nl_timestamp.
* inet/check_pf.c: Provide dummy versions of __free_in6ai and
__bump_nl_timestamp.
* nscd/connections (nscd_init): When host database is served open
netlink socket and request notification about configuration changes.
(main_loop_poll): Track netlink file descriptor and bump timestamp
in case data becomes available.
(main_loop_epoll): Likewise.
* nscd/nscd-client.h (DB_VERSION): Bump to 2.
(database_pers_head): Add extra_data fileds.
Declare __nscd_get_mapping and __nscd_get_nl_timestamp.
* nscd/nscd_gethst_r.c (__nscd_get_nl_timestamp): New function.
* nscd/nscd_helper.c (__nscd_get_mapping): Renamed from get_mapping.
Adjust caller.
* sysdeps/posix/getaddrinfo.c (getaddrinfo): Don't call free on
in6ai data, call __free_in6ai.
* sysdeps/unix/sysv/linux/Makefile [subdir=nscd] (sysdep-CFLAGS):
Add -DHAVE_NETLINK.
* sysdeps/unix/sysv/linux/check_pf.c: Major rewrite. Cache the
interface information. Reuse previous data if netlink timestamp
is not changed.
(__bump_nl_timestamp): New function.
(__free_in6ai): New function.
2011-10-30 Ulrich Drepper <drepper@gmail.com>
* sysdeps/unix/sysv/linux/check_pf.c (make_request): Don't call

View file

@ -21,8 +21,13 @@ struct in6addrinfo
extern void __check_pf (bool *seen_ipv4, bool *seen_ipv6,
struct in6addrinfo **in6ai, size_t *in6ailen)
attribute_hidden;
extern void __free_in6ai (struct in6addrinfo *in6ai) attribute_hidden;
extern void __check_native (uint32_t a1_index, int *a1_native,
uint32_t a2_index, int *a2_native)
attribute_hidden;
#ifdef IS_IN_nscd
extern uint32_t __bump_nl_timestamp (void) attribute_hidden;
#endif
#endif /* ifaddrs.h */

View file

@ -1,5 +1,5 @@
/* Determine protocol families for which interfaces exist. Generic version.
Copyright (C) 2003, 2006 Free Software Foundation, Inc.
Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -54,3 +54,19 @@ __check_pf (bool *seen_ipv4, bool *seen_ipv6,
(void) freeifaddrs (ifa);
}
void
__free_in6ai (struct in6addrinfo *in6ai)
{
/* Nothing to do. */
}
#ifdef IS_IN_nscd
uint32_t
__bump_nl_timestamp (void)
{
return 0;
}
#endif

View file

@ -24,6 +24,7 @@
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <ifaddrs.h>
#include <libintl.h>
#include <pthread.h>
#include <pwd.h>
@ -32,6 +33,9 @@
#include <stdlib.h>
#include <unistd.h>
#include <arpa/inet.h>
#ifdef HAVE_NETLINK
# include <netlink/netlink.h>
#endif
#ifdef HAVE_EPOLL
# include <sys/epoll.h>
#endif
@ -247,6 +251,11 @@ static int sock;
int inotify_fd = -1;
#endif
#ifdef HAVE_NETLINK
/* Descriptor for netlink status updates. */
static int nl_status_fd = -1;
#endif
#ifndef __ASSUME_SOCK_CLOEXEC
/* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
before be know the result. */
@ -903,6 +912,65 @@ cannot set socket to close on exec: %s; disabling paranoia mode"),
exit (1);
}
#ifdef HAVE_NETLINK
if (dbs[hstdb].enabled)
{
/* Try to open netlink socket to monitor network setting changes. */
nl_status_fd = socket (AF_NETLINK,
SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
NETLINK_ROUTE);
if (nl_status_fd != -1)
{
struct sockaddr_nl snl;
memset (&snl, '\0', sizeof (snl));
snl.nl_family = AF_NETLINK;
/* XXX Is this the best set to use? */
snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
| RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
| RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
| RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
| RTMGRP_IPV6_PREFIX);
if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
{
close (nl_status_fd);
nl_status_fd = -1;
}
else
{
/* Start the timestamp process. */
dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
= __bump_nl_timestamp ();
# ifndef __ASSUME_SOCK_CLOEXEC
if (have_sock_cloexec < 0)
{
/* We don't want to get stuck on accept. */
int fl = fcntl (nl_status_fd, F_GETFL);
if (fl == -1
|| fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
{
dbg_log (_("\
cannot change socket to nonblocking mode: %s"),
strerror (errno));
exit (1);
}
/* The descriptor needs to be closed on exec. */
if (paranoia
&& fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
{
dbg_log (_("cannot set socket to close on exec: %s"),
strerror (errno));
exit (1);
}
}
# endif
}
}
}
#endif
/* Change to unprivileged uid/gid/groups if specified in config file */
if (server_user != NULL)
finish_drop_privileges ();
@ -1826,6 +1894,18 @@ main_loop_poll (void)
}
#endif
#ifdef HAVE_NETLINK
size_t idx_nl_status_fd = 0;
if (nl_status_fd != -1)
{
idx_nl_status_fd = nused;
conns[nused].fd = nl_status_fd;
conns[nused].events = POLLRDNORM;
++nused;
firstfree = nused;
}
#endif
while (1)
{
/* Wait for any event. We wait at most a couple of seconds so
@ -1968,6 +2048,20 @@ disabled inotify after read error %d"),
}
#endif
#ifdef HAVE_NETLINK
if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
{
char buf[4096];
/* Read all the data. We do not interpret it here. */
while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
sizeof (buf))) != -1)
;
dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
= __bump_nl_timestamp ();
}
#endif
for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
if (conns[cnt].revents != 0)
{
@ -2046,6 +2140,17 @@ main_loop_epoll (int efd)
}
# endif
# ifdef HAVE_NETLINK
if (nl_status_fd != -1)
{
ev.events = EPOLLRDNORM;
ev.data.fd = nl_status_fd;
if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
/* We cannot use epoll. */
return;
}
# endif
while (1)
{
struct epoll_event revs[100];
@ -2161,6 +2266,18 @@ main_loop_epoll (int efd)
pthread_cond_signal (&dbs[dbcnt].prune_cond);
}
}
# endif
# ifdef HAVE_NETLINK
else if (revs[cnt].data.fd == nl_status_fd)
{
char buf[4096];
/* Read all the data. We do not interpret it here. */
while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
sizeof (buf))) != -1)
;
__bump_nl_timestamp ();
}
# endif
else
{
@ -2185,6 +2302,7 @@ main_loop_epoll (int efd)
time_t laststart = now - ACCEPT_TIMEOUT;
assert (starttime[sock] == 0);
assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
if (starttime[cnt] != 0 && starttime[cnt] < laststart)
{

View file

@ -260,12 +260,17 @@ struct hashentry
/* Current persistent database version. */
#define DB_VERSION 1
#define DB_VERSION 2
/* Maximum time allowed between updates of the timestamp. */
#define MAPPING_TIMEOUT (5 * 60)
/* Used indices for the EXTRA_DATA element of 'database_pers_head'.
Each database has its own indices. */
#define NSCD_HST_IDX_CONF_TIMESTAMP 0
/* Header of persistent database file. */
struct database_pers_head
{
@ -274,6 +279,8 @@ struct database_pers_head
volatile int32_t gc_cycle;
volatile int32_t nscd_certainly_running;
volatile nscd_time_t timestamp;
/* Room for extensions. */
volatile uint32_t extra_data[4];
nscd_ssize_t module;
nscd_ssize_t data_size;
@ -322,6 +329,12 @@ extern int __nscd_open_socket (const char *key, size_t keylen,
request_type type, void *response,
size_t responselen) attribute_hidden;
/* Try to get a file descriptor for the shared meory segment
containing the database. */
extern struct mapped_database *__nscd_get_mapping (request_type type,
const char *key,
struct mapped_database **mappedp) attribute_hidden;
/* Get reference of mapping. */
extern struct mapped_database *__nscd_get_map_ref (request_type type,
const char *name,
@ -371,4 +384,7 @@ extern ssize_t writeall (int fd, const void *buf, size_t len)
extern ssize_t sendfileall (int tofd, int fromfd, off_t off, size_t len)
attribute_hidden;
/* Get netlink timestamp counter from mapped area or zero. */
extern uint32_t __nscd_get_nl_timestamp (void);
#endif /* nscd.h */

View file

@ -1,4 +1,4 @@
/* Copyright (C) 1998-2005, 2006, 2007, 2008, 2009
/* Copyright (C) 1998-2005, 2006, 2007, 2008, 2009, 2011
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@ -98,6 +98,27 @@ libc_freeres_fn (hst_map_free)
}
uint32_t
__nscd_get_nl_timestamp (void)
{
if (__nss_not_use_nscd_hosts != 0)
return 0;
struct mapped_database *map = __hst_map_handle.mapped;
if (map == NULL
|| (map != NO_MAPPING
&& map->head->nscd_certainly_running == 0
&& map->head->timestamp + MAPPING_TIMEOUT < time (NULL)))
map = __nscd_get_mapping (GETFDHST, "hosts", &__hst_map_handle.mapped);
if (map == NO_MAPPING)
return 0;
return map->head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP];
}
int __nss_have_localdomain attribute_hidden;
static int

View file

@ -277,9 +277,9 @@ __nscd_unmap (struct mapped_database *mapped)
/* Try to get a file descriptor for the shared meory segment
containing the database. */
static struct mapped_database *
get_mapping (request_type type, const char *key,
struct mapped_database **mappedp)
struct mapped_database *
__nscd_get_mapping (request_type type, const char *key,
struct mapped_database **mappedp)
{
struct mapped_database *result = NO_MAPPING;
#ifdef SCM_RIGHTS
@ -449,8 +449,8 @@ __nscd_get_map_ref (request_type type, const char *name,
|| (cur->head->nscd_certainly_running == 0
&& cur->head->timestamp + MAPPING_TIMEOUT < time (NULL))
|| cur->head->data_size > cur->datasize)
cur = get_mapping (type, name,
(struct mapped_database **) &mapptr->mapped);
cur = __nscd_get_mapping (type, name,
(struct mapped_database **) &mapptr->mapped);
if (__builtin_expect (cur != NO_MAPPING, 1))
{

View file

@ -2386,7 +2386,7 @@ getaddrinfo (const char *name, const char *service,
|| (hints->ai_family == PF_INET6 && ! seen_ipv6))
{
/* We cannot possibly return a valid answer. */
free (in6ai);
__free_in6ai (in6ai);
return EAI_NONAME;
}
}
@ -2400,7 +2400,7 @@ getaddrinfo (const char *name, const char *service,
{
if (hints->ai_flags & AI_NUMERICSERV)
{
free (in6ai);
__free_in6ai (in6ai);
return EAI_NONAME;
}
@ -2422,7 +2422,7 @@ getaddrinfo (const char *name, const char *service,
if (last_i != 0)
{
freeaddrinfo (p);
free (in6ai);
__free_in6ai (in6ai);
return -(last_i & GAIH_EAI);
}
@ -2434,7 +2434,7 @@ getaddrinfo (const char *name, const char *service,
}
else
{
free (in6ai);
__free_in6ai (in6ai);
return EAI_FAMILY;
}
@ -2622,7 +2622,7 @@ getaddrinfo (const char *name, const char *service,
p->ai_canonname = canonname;
}
free (in6ai);
__free_in6ai (in6ai);
if (p)
{

View file

@ -161,6 +161,6 @@ CFLAGS-mq_receive.c += -fexceptions
endif
ifeq ($(subdir),nscd)
sysdep-CFLAGS += -DHAVE_EPOLL -DHAVE_SENDFILE -DHAVE_INOTIFY
sysdep-CFLAGS += -DHAVE_EPOLL -DHAVE_SENDFILE -DHAVE_INOTIFY -DHAVE_NETLINK
CFLAGS-gai.c += -DNEED_NETLINK
endif

View file

@ -1,5 +1,5 @@
/* Determine protocol families for which interfaces exist. Linux version.
Copyright (C) 2003, 2006, 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
Copyright (C) 2003, 2006-2008, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -33,6 +33,9 @@
#include <not-cancel.h>
#include <kernel-features.h>
#include <bits/libc-lock.h>
#include <atomic.h>
#include <nscd/nscd-client.h>
#ifndef IFA_F_HOMEADDRESS
@ -43,9 +46,42 @@
#endif
static int
make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
struct in6addrinfo **in6ai, size_t *in6ailen)
struct cached_data
{
uint32_t timestamp;
uint32_t usecnt;
bool seen_ipv4;
bool seen_ipv6;
size_t in6ailen;
struct in6addrinfo in6ai[0];
};
static struct cached_data noai6ai_cached =
{
.usecnt = 3, /* Make sure we never try to delete this entry. */
.in6ailen = 0
};
static struct cached_data *cache;
__libc_lock_define_initialized (static, lock);
#ifdef IS_IN_nscd
static uint32_t nl_timestamp;
uint32_t
__bump_nl_timestamp (void)
{
if (atomic_increment_val (&nl_timestamp) == 0)
atomic_increment (&nl_timestamp);
return nl_timestamp;
}
#endif
static struct cached_data *
make_request (int fd, pid_t pid)
{
struct req
{
@ -99,9 +135,6 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
sizeof (nladdr))) < 0)
goto out_fail;
*seen_ipv4 = false;
*seen_ipv6 = false;
bool done = false;
struct in6ailist
{
@ -109,6 +142,8 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
struct in6ailist *next;
} *in6ailist = NULL;
size_t in6ailistlen = 0;
bool seen_ipv4 = false;
bool seen_ipv6 = false;
do
{
@ -172,12 +207,12 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
{
if (*(const in_addr_t *) address
!= htonl (INADDR_LOOPBACK))
*seen_ipv4 = true;
seen_ipv4 = true;
}
else
{
if (!IN6_IS_ADDR_LOOPBACK (address))
*seen_ipv6 = true;
seen_ipv6 = true;
}
}
@ -211,30 +246,46 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6,
}
while (! done);
if (*seen_ipv6 && in6ailist != NULL)
struct cached_data *result;
if (seen_ipv6 && in6ailist != NULL)
{
*in6ai = malloc (in6ailistlen * sizeof (**in6ai));
if (*in6ai == NULL)
result = malloc (sizeof (*result)
+ in6ailistlen * sizeof (struct in6addrinfo));
if (result == NULL)
goto out_fail;
*in6ailen = in6ailistlen;
#ifdef IS_IN_nscd
result->timestamp = nl_timestamp;
#else
result->timestamp = __nscd_get_nl_timestamp ();
#endif
result->usecnt = 2;
result->seen_ipv4 = seen_ipv4;
result->seen_ipv6 = true;
result->in6ailen = in6ailistlen;
do
{
(*in6ai)[--in6ailistlen] = in6ailist->info;
result->in6ai[--in6ailistlen] = in6ailist->info;
in6ailist = in6ailist->next;
}
while (in6ailist != NULL);
}
else
{
noai6ai_cached.seen_ipv4 = seen_ipv4;
noai6ai_cached.seen_ipv6 = seen_ipv6;
result = &noai6ai_cached;
}
if (use_malloc)
free (buf);
return 0;
return result;
out_fail:
if (use_malloc)
free (buf);
return -1;
return NULL;
}
@ -258,28 +309,65 @@ __check_pf (bool *seen_ipv4, bool *seen_ipv6,
if (! __no_netlink_support)
{
int fd = __socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
struct cached_data *olddata = NULL;
struct cached_data *data = NULL;
if (__builtin_expect (fd >= 0, 1))
__libc_lock_lock (lock);
#ifdef IS_IN_nscd
# define cache_valid() nl_timestamp != 0 && cache->timestamp == nl_timestamp
#else
# define cache_valid() \
({ uint32_t val = __nscd_get_nl_timestamp (); \
val != 0 && cache->timestamp == val; })
#endif
if (cache != NULL && cache_valid ())
{
struct sockaddr_nl nladdr;
memset (&nladdr, '\0', sizeof (nladdr));
nladdr.nl_family = AF_NETLINK;
data = cache;
atomic_increment (&cache->usecnt);
}
else
{
int fd = __socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
socklen_t addr_len = sizeof (nladdr);
if (__builtin_expect (fd >= 0, 1))
{
struct sockaddr_nl nladdr;
memset (&nladdr, '\0', sizeof (nladdr));
nladdr.nl_family = AF_NETLINK;
bool success
= (__bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0
&& __getsockname (fd, (struct sockaddr *) &nladdr,
&addr_len) == 0
&& make_request (fd, nladdr.nl_pid, seen_ipv4, seen_ipv6,
in6ai, in6ailen) == 0);
socklen_t addr_len = sizeof (nladdr);
close_not_cancel_no_status (fd);
if(__bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0
&& __getsockname (fd, (struct sockaddr *) &nladdr,
&addr_len) == 0)
data = make_request (fd, nladdr.nl_pid);
if (success)
/* It worked. */
return;
close_not_cancel_no_status (fd);
}
if (data != NULL)
{
olddata = cache;
cache = data;
}
}
__libc_lock_unlock (lock);
if (data != NULL)
{
/* It worked. */
*seen_ipv4 = data->seen_ipv4;
*seen_ipv6 = data->seen_ipv6;
*in6ailen = data->in6ailen;
*in6ai = data->in6ai;
if (olddata != NULL && olddata->usecnt > 0
&& atomic_add_zero (&olddata->usecnt, -1))
free (olddata);
return;
}
#if __ASSUME_NETLINK_SUPPORT == 0
@ -318,3 +406,26 @@ __check_pf (bool *seen_ipv4, bool *seen_ipv6,
(void) freeifaddrs (ifa);
#endif
}
void
__free_in6ai (struct in6addrinfo *ai)
{
if (ai != NULL)
{
struct cached_data *data =
(struct cached_data *) ((char *) ai
- offsetof (struct cached_data, in6ai));
if (atomic_add_zero (&data->usecnt, -1))
{
__libc_lock_lock (lock);
if (data->usecnt == 0)
/* Still unused. */
free (data);
__libc_lock_unlock (lock);
}
}
}