Merge pull request #11086 from poettering/nscd-cache-flush

flush nscd's caches when we register user/groups/hostnames
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2018-12-17 11:29:58 +01:00 committed by GitHub
commit 582de70f2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 254 additions and 7 deletions

View File

@ -10,6 +10,7 @@
#include "fileio.h"
#include "fs-util.h"
#include "io-util.h"
#include "nscd-flush.h"
#include "parse-util.h"
#include "random-util.h"
#include "serialize.h"
@ -383,6 +384,7 @@ static int dynamic_user_realize(
_cleanup_close_ int etc_passwd_lock_fd = -1;
uid_t num = UID_INVALID; /* a uid if is_user, and a gid otherwise */
gid_t gid = GID_INVALID; /* a gid if is_user, ignored otherwise */
bool flush_cache = false;
int r;
assert(d);
@ -471,6 +473,7 @@ static int dynamic_user_realize(
}
/* Great! Nothing is stored here, still. Store our newly acquired data. */
flush_cache = true;
} else {
/* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
* acquired, and use what's stored now. */
@ -500,6 +503,14 @@ static int dynamic_user_realize(
if (r < 0)
return r;
if (flush_cache) {
/* If we allocated a new dynamic UID, refresh nscd, so that it forgets about potentially cached
* negative entries. But let's do so after we release the /etc/passwd lock, so that there's no
* potential for nscd wanting to lock that for completing the invalidation. */
etc_passwd_lock_fd = safe_close(etc_passwd_lock_fd);
(void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
}
if (is_user) {
*ret_uid = num;
*ret_gid = gid != GID_INVALID ? gid : num;
@ -572,6 +583,8 @@ static int dynamic_user_close(DynamicUser *d) {
/* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
unlink_uid_lock(lock_fd, uid, d->name);
(void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
return 1;
}

View File

@ -18,6 +18,7 @@
#include "id128-util.h"
#include "main-func.h"
#include "missing_capability.h"
#include "nscd-flush.h"
#include "os-util.h"
#include "parse-util.h"
#include "path-util.h"
@ -290,6 +291,8 @@ static int context_update_kernel_hostname(Context *c) {
if (sethostname_idempotent(hn) < 0)
return -errno;
(void) nscd_flush_cache(STRV_MAKE("hosts"));
return 0;
}

View File

@ -398,6 +398,7 @@ int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
machine_save(m);
machine_send_signal(m, true);
(void) manager_enqueue_nscd_cache_flush(m->manager);
return 0;
}
@ -439,6 +440,7 @@ int machine_stop(Machine *m) {
m->stopping = true;
machine_save(m);
(void) manager_enqueue_nscd_cache_flush(m->manager);
return r;
}

View File

@ -0,0 +1,36 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "machined.h"
#include "nscd-flush.h"
#include "strv.h"
static int on_nscd_cache_flush_event(sd_event_source *s, void *userdata) {
/* Let's ask glibc's nscd daemon to flush its caches. We request this for the three database machines may show
* up in: the hosts database (for resolvable machine names) and the user and group databases (for the user ns
* ranges). */
(void) nscd_flush_cache(STRV_MAKE("passwd", "group", "hosts"));
return 0;
}
int manager_enqueue_nscd_cache_flush(Manager *m) {
int r;
assert(m);
if (!m->nscd_cache_flush_event) {
r = sd_event_add_defer(m->event, &m->nscd_cache_flush_event, on_nscd_cache_flush_event, m);
if (r < 0)
return log_error_errno(r, "Failed to allocate NSCD cache flush event: %m");
sd_event_source_set_description(m->nscd_cache_flush_event, "nscd-cache-flush");
}
r = sd_event_source_set_enabled(m->nscd_cache_flush_event, SD_EVENT_ONESHOT);
if (r < 0) {
m->nscd_cache_flush_event = sd_event_source_unref(m->nscd_cache_flush_event);
return log_error_errno(r, "Failed to enable NSCD cache flush event: %m");
}
return 0;
}

View File

@ -25,8 +25,7 @@
static Manager* manager_unref(Manager *m);
DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(machine_hash_ops, void, trivial_hash_func, trivial_compare_func,
Machine, machine_free);
DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(machine_hash_ops, char, string_hash_func, string_compare_func, Machine, machine_free);
static int manager_new(Manager **ret) {
_cleanup_(manager_unrefp) Manager *m = NULL;
@ -38,9 +37,9 @@ static int manager_new(Manager **ret) {
if (!m)
return -ENOMEM;
m->machines = hashmap_new(&string_hash_ops);
m->machines = hashmap_new(&machine_hash_ops);
m->machine_units = hashmap_new(&string_hash_ops);
m->machine_leaders = hashmap_new(&machine_hash_ops);
m->machine_leaders = hashmap_new(NULL);
if (!m->machines || !m->machine_units || !m->machine_leaders)
return -ENOMEM;
@ -72,12 +71,13 @@ static Manager* manager_unref(Manager *m) {
assert(m->n_operations == 0);
hashmap_free(m->machines);
hashmap_free(m->machines); /* This will free all machines, so that the machine_units/machine_leaders is empty */
hashmap_free(m->machine_units);
hashmap_free(m->machine_leaders);
hashmap_free(m->image_cache);
sd_event_source_unref(m->image_cache_defer_event);
sd_event_source_unref(m->nscd_cache_flush_event);
bus_verify_polkit_async_registry_free(m->polkit_registry);

View File

@ -35,6 +35,8 @@ struct Manager {
LIST_HEAD(Operation, operations);
unsigned n_operations;
sd_event_source *nscd_cache_flush_event;
};
int manager_add_machine(Manager *m, const char *name, Machine **_machine);
@ -53,3 +55,5 @@ int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_erro
int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
int manager_unit_is_active(Manager *manager, const char *unit);
int manager_job_is_active(Manager *manager, const char *path);
int manager_enqueue_nscd_cache_flush(Manager *m);

View File

@ -9,6 +9,7 @@ libmachine_core_sources = files('''
machine.c
machine.h
machined-dbus.c
machined-core.c
machine-dbus.c
machine-dbus.h
image-dbus.c

View File

@ -82,6 +82,8 @@ shared_sources = files('''
install-printf.h
install.c
install.h
ip-protocol-list.c
ip-protocol-list.h
journal-importer.c
journal-importer.h
journal-util.c
@ -103,6 +105,8 @@ shared_sources = files('''
module-util.h
mount-util.c
mount-util.h
nscd-flush.c
nscd-flush.h
nsflags.c
nsflags.h
os-util.c
@ -128,8 +132,6 @@ shared_sources = files('''
serialize.h
sleep-config.c
sleep-config.h
ip-protocol-list.c
ip-protocol-list.h
spawn-ask-password-agent.c
spawn-ask-password-agent.h
spawn-polkit-agent.c

151
src/shared/nscd-flush.c Normal file
View File

@ -0,0 +1,151 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <sys/poll.h>
#include "fd-util.h"
#include "io-util.h"
#include "nscd-flush.h"
#include "socket-util.h"
#include "strv.h"
#include "time-util.h"
#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC)
struct nscdInvalidateRequest {
int32_t version;
int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that
* even? */
int32_t key_len;
char dbname[];
};
static const union sockaddr_union nscd_sa = {
.un.sun_family = AF_UNIX,
.un.sun_path = "/run/nscd/socket",
};
static int nscd_flush_cache_one(const char *database, usec_t end) {
size_t req_size, has_written = 0, has_read = 0, l;
struct nscdInvalidateRequest *req;
_cleanup_close_ int fd = -1;
int32_t resp;
int events;
assert(database);
l = strlen(database);
req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1;
req = alloca(req_size);
*req = (struct nscdInvalidateRequest) {
.version = 2,
.type = 10,
.key_len = l + 1,
};
strcpy(req->dbname, database);
fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
if (fd < 0)
return log_debug_errno(errno, "Failed to allocate nscd socket: %m");
/* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The
* kernel lets us know this way that the connection is now being established, and we should watch with poll()
* to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is
* always instant on AF_UNIX), hence handling this is mostly just an excercise in defensive, protocol-agnostic
* programming.
*
* connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right
* away, after all this entire function here is written in a defensive style so that a non-responding nscd
* doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a
* nice way to connect() to a server synchronously with a time limit that would also cover dealing with the
* backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly
* and not really reasonably usable from threads-aware code.) */
if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) {
if (errno == EAGAIN)
return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m");
if (errno != EINPROGRESS)
return log_debug_errno(errno, "Failed to connect to nscd socket: %m");
/* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that
* establishing the connection is complete. */
events = 0;
} else
events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress
* one poll() invocation */
for (;;) {
usec_t p;
if (events & POLLOUT) {
ssize_t m;
assert(has_written < req_size);
m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL);
if (m < 0) {
if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't
* take the data right now, and that includes if the connect() is
* asynchronous and we saw EINPROGRESS on it, and it hasn't
* completed yet. */
return log_debug_errno(errno, "Failed to write to nscd socket: %m");
} else
has_written += m;
}
if (events & (POLLIN|POLLERR|POLLHUP)) {
ssize_t m;
if (has_read >= sizeof(resp))
return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m");
m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0);
if (m < 0) {
if (errno != EAGAIN)
return log_debug_errno(errno, "Failed to read from nscd socket: %m");
} else if (m == 0) { /* EOF */
if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the
* connection, accept that as OK */
return 1;
return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection.");
} else
has_read += m;
}
if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */
if (resp < 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error numer: %i", resp);
if (resp > 0)
return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database);
return 1;
}
p = now(CLOCK_MONOTONIC);
if (p >= end)
return -ETIMEDOUT;
events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p);
if (events < 0)
return events;
}
}
int nscd_flush_cache(char **databases) {
usec_t end;
int r = 0;
char **i;
/* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s time-out, so that we
* don't block indefinitely on another service. */
end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC);
STRV_FOREACH(i, databases) {
int k;
k = nscd_flush_cache_one(*i, end);
if (k < 0 && r >= 0)
r = k;
}
return r;
}

4
src/shared/nscd-flush.h Normal file
View File

@ -0,0 +1,4 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
int nscd_flush_cache(char **databases);

View File

@ -94,6 +94,17 @@ tests += [
libblkid],
'', 'manual'],
[['src/test/test-nscd-flush.c'],
[libcore,
libshared],
[threads,
librt,
libseccomp,
libselinux,
libmount,
libblkid],
'', 'manual'],
[['src/test/test-loopback.c'],
[libcore,
libshared],

View File

@ -0,0 +1,20 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "main-func.h"
#include "nscd-flush.h"
#include "strv.h"
#include "tests.h"
static int run(int argc, char *argv[]) {
int r;
test_setup_logging(LOG_DEBUG);
r = nscd_flush_cache(STRV_MAKE("group", "passwd", "hosts"));
if (r < 0)
return log_error_errno(r, "Failed to flush NSCD cache");
return 0;
}
DEFINE_MAIN_FUNCTION(run);