From a82f89aa9e099d403d391d9ae0afd2c391fd83f8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 25 May 2018 17:05:07 +0200 Subject: [PATCH 01/14] sd-event: use structure initialization for epoll_event --- src/libsystemd/sd-event/sd-event.c | 35 +++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index c02783cb8e..0de44e9124 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -518,18 +518,17 @@ static int source_io_register( int enabled, uint32_t events) { - struct epoll_event ev = {}; + struct epoll_event ev; int r; assert(s); assert(s->type == SOURCE_IO); assert(enabled != SD_EVENT_OFF); - ev.events = events; - ev.data.ptr = s; - - if (enabled == SD_EVENT_ONESHOT) - ev.events |= EPOLLONESHOT; + ev = (struct epoll_event) { + .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0), + .data.ptr = s, + }; if (s->io.registered) r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev); @@ -621,7 +620,7 @@ static int event_make_signal_data( int sig, struct signal_data **ret) { - struct epoll_event ev = {}; + struct epoll_event ev; struct signal_data *d; bool added = false; sigset_t ss_copy; @@ -686,8 +685,10 @@ static int event_make_signal_data( d->fd = fd_move_above_stdio(r); - ev.events = EPOLLIN; - ev.data.ptr = d; + ev = (struct epoll_event) { + .events = EPOLLIN, + .data.ptr = d, + }; r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev); if (r < 0) { @@ -1021,7 +1022,7 @@ static int event_setup_timer_fd( struct clock_data *d, clockid_t clock) { - struct epoll_event ev = {}; + struct epoll_event ev; int r, fd; assert(e); @@ -1036,8 +1037,10 @@ static int event_setup_timer_fd( fd = fd_move_above_stdio(fd); - ev.events = EPOLLIN; - ev.data.ptr = d; + ev = (struct epoll_event) { + .events = EPOLLIN, + .data.ptr = d, + }; r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev); if (r < 0) { @@ -2853,7 +2856,7 @@ _public_ int sd_event_set_watchdog(sd_event *e, int b) { return e->watchdog; if (b) { - struct epoll_event ev = {}; + struct epoll_event ev; r = sd_watchdog_enabled(false, &e->watchdog_period); if (r <= 0) @@ -2871,8 +2874,10 @@ _public_ int sd_event_set_watchdog(sd_event *e, int b) { if (r < 0) goto fail; - ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG); + ev = (struct epoll_event) { + .events = EPOLLIN, + .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG), + }; r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev); if (r < 0) { From de05913d067767cbf6ca7c4e3554a14fd4e2bb12 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 25 May 2018 17:05:30 +0200 Subject: [PATCH 02/14] sd-event: use symbolic name for normal priority --- src/libsystemd/sd-event/sd-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 0de44e9124..7d6096261b 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -635,7 +635,7 @@ static int event_make_signal_data( if (e->signal_sources && e->signal_sources[sig]) priority = e->signal_sources[sig]->priority; else - priority = 0; + priority = SD_EVENT_PRIORITY_NORMAL; d = hashmap_get(e->signal_data, &priority); if (d) { From ac989a783a31df95e6c0ce2a90a8d2e1abe73592 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 25 May 2018 17:06:39 +0200 Subject: [PATCH 03/14] sd-event: drop pending events when we turn off/on an event source --- src/libsystemd/sd-event/sd-event.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 7d6096261b..3dc0cdb957 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -1638,6 +1638,13 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { if (m == SD_EVENT_OFF) { + /* Unset the pending flag when this event source is disabled */ + if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) { + r = source_set_pending(s, false); + if (r < 0) + return r; + } + switch (s->type) { case SOURCE_IO: @@ -1692,6 +1699,14 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } } else { + + /* Unset the pending flag when this event source is enabled */ + if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) { + r = source_set_pending(s, false); + if (r < 0) + return r; + } + switch (s->type) { case SOURCE_IO: From 2a0dc6cd0472fa9e061c339a7747f962879aa57f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 25 May 2018 17:08:40 +0200 Subject: [PATCH 04/14] sd-event: propagate errors from source_set_pending() in all cases --- src/libsystemd/sd-event/sd-event.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 3dc0cdb957..3ebd9e75df 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -1531,6 +1531,10 @@ _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) if (s->io.events == events && !(events & EPOLLET)) return 0; + r = source_set_pending(s, false); + if (r < 0) + return r; + if (s->enabled != SD_EVENT_OFF) { r = source_io_register(s, s->enabled, events); if (r < 0) @@ -1538,7 +1542,6 @@ _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) } s->io.events = events; - source_set_pending(s, false); return 0; } @@ -1800,15 +1803,18 @@ _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) { _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { struct clock_data *d; + int r; assert_return(s, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); - s->time.next = usec; + r = source_set_pending(s, false); + if (r < 0) + return r; - source_set_pending(s, false); + s->time.next = usec; d = event_get_clock_data(s->event, s->type); assert(d); @@ -1832,6 +1838,7 @@ _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *use _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) { struct clock_data *d; + int r; assert_return(s, -EINVAL); assert_return(usec != (uint64_t) -1, -EINVAL); @@ -1839,13 +1846,15 @@ _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); + r = source_set_pending(s, false); + if (r < 0) + return r; + if (usec == 0) usec = DEFAULT_ACCURACY_USEC; s->time.accuracy = usec; - source_set_pending(s, false); - d = event_get_clock_data(s->event, s->type); assert(d); From cc59d290541f03118b10539211faa20ccb158c0d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 25 May 2018 17:09:26 +0200 Subject: [PATCH 05/14] sd-event: voidify more things --- src/libsystemd/sd-event/sd-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 3ebd9e75df..fd8ba14ce1 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -2342,7 +2342,7 @@ static int source_dispatch(sd_event_source *s) { /* Now, reap the PID for good. */ if (zombie) - waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED); + (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED); break; } From 97ef5391697c34ee1c763fa9bddcd20a29ff3159 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 28 May 2018 16:26:50 +0200 Subject: [PATCH 06/14] sd-event: add new API for subscribing to inotify events This adds a new call sd_event_add_inotify() which allows watching for inotify events on specified paths. sd-event will try to minimize the number of inotify fds allocated, and will try to add file watches to the same inotify fd objects as far as that's possible. Doing this kind of inotify object should optimize behaviour in programs that watch a limited set of mostly independent files as in most cases a single inotify object will suffice for watching all files. Traditionally, this kind of coalescing logic (i.e. that multiple event sources are implemented on top of a single inotify object) was very hard to do, as the inotify API had serious limitations: it only allowed adding watches by path, and would implicitly merge watches installed on the same node via different path, without letting the caller know about whether such merging took place or not. With the advent of O_PATH this issue can be dealt with to some point: instead of adding a path to watch to an inotify object with inotify_add_watch() right away, we can open the path with O_PATH first, call fstat() on the fd, and check the .st_dev/.st_ino fields of that against a list of watches we already have in place. If we find one we know that the inotify_add_watch() will update the watch mask of the existing watch, otherwise it will create a new watch. To make this race-free we use inotify_add_watch() on the /proc/self/fd/ path of the O_PATH fd, instead of the original path, so that we do the checking and watch updating with guaranteed the same inode. This approach let's us deal safely with inodes that may appear under various different paths (due to symlinks, hardlinks, bind mounts, fs namespaces). However it's not a perfect solution: currently the kernel has no API for changing the watch mask of an existing watch -- unless you have a path or fd to the original inode. This means we can "merge" the watches of the same inode of multiple event sources correctly, but we cannot "unmerge" it again correctly in many cases, as access to the original inode might have been lost, due to renames, mount/unmount, or deletions. We could in theory always keep open an O_PATH fd of the inode to watch so that we can change the mask anytime we want, but this is highly problematics, as it would consume too many fds (and in fact the scarcity of fds is the reason why watch descriptors are a separate concepts from fds) and would keep the backing mounts busy (wds do not keep mounts busy, fds do). The current implemented approach to all this: filter in userspace and accept that the watch mask on some inode might be higher than necessary due to earlier installed event sources that might have ceased to exist. This approach while ugly shouldn't be too bad for most cases as the same inodes are probably wacthed for the same masks in most implementations. In order to implement priorities correctly a seperate inotify object is allocated for each priority that is used. This way we get separate per-priority event queues, of which we never dequeue more than a few events at a time. Fixes: #3982 --- src/libsystemd/libsystemd.sym | 2 + src/libsystemd/sd-event/sd-event.c | 816 ++++++++++++++++++++++++++++- src/systemd/sd-event.h | 4 + 3 files changed, 821 insertions(+), 1 deletion(-) diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 6f2075a33a..1bc70cc1f2 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -563,4 +563,6 @@ global: sd_bus_open_system_with_description; sd_bus_slot_get_floating; sd_bus_slot_set_floating; + sd_event_add_inotify; + sd_event_source_get_inotify_mask; } LIBSYSTEMD_238; diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index fd8ba14ce1..82209e4251 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -15,6 +15,7 @@ #include "alloc-util.h" #include "fd-util.h" +#include "fs-util.h" #include "hashmap.h" #include "list.h" #include "macro.h" @@ -43,6 +44,7 @@ typedef enum EventSourceType { SOURCE_POST, SOURCE_EXIT, SOURCE_WATCHDOG, + SOURCE_INOTIFY, _SOURCE_EVENT_SOURCE_TYPE_MAX, _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1 } EventSourceType; @@ -60,6 +62,7 @@ static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] [SOURCE_POST] = "post", [SOURCE_EXIT] = "exit", [SOURCE_WATCHDOG] = "watchdog", + [SOURCE_INOTIFY] = "inotify", }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); @@ -71,12 +74,15 @@ typedef enum WakeupType { WAKEUP_EVENT_SOURCE, WAKEUP_CLOCK_DATA, WAKEUP_SIGNAL_DATA, + WAKEUP_INOTIFY_DATA, _WAKEUP_TYPE_MAX, _WAKEUP_TYPE_INVALID = -1, } WakeupType; #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM) +struct inode_data; + struct sd_event_source { WakeupType wakeup; @@ -138,6 +144,12 @@ struct sd_event_source { sd_event_handler_t callback; unsigned prioq_index; } exit; + struct { + sd_event_inotify_handler_t callback; + uint32_t mask; + struct inode_data *inode_data; + LIST_FIELDS(sd_event_source, by_inode_data); + } inotify; }; }; @@ -172,6 +184,64 @@ struct signal_data { sd_event_source *current; }; +/* A structure listing all event sources currently watching a specific inode */ +struct inode_data { + /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */ + ino_t ino; + dev_t dev; + + /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can + * rearrange the priority still until then, as we need the original inode to change the priority as we need to + * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the + * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of + * the sd-event object, so that it is efficient to close everything, before entering the next event loop + * iteration. */ + int fd; + + /* The inotify "watch descriptor" */ + int wd; + + /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has + * most recently been set on the watch descriptor. */ + uint32_t combined_mask; + + /* All event sources subscribed to this inode */ + LIST_HEAD(sd_event_source, event_sources); + + /* The inotify object we watch this inode with */ + struct inotify_data *inotify_data; + + /* A linked list of all inode data objects with fds to close (see above) */ + LIST_FIELDS(struct inode_data, to_close); +}; + +/* A structure encapsulating an inotify fd */ +struct inotify_data { + WakeupType wakeup; + + /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at + * a time */ + + int fd; + int64_t priority; + + Hashmap *inodes; /* The inode_data structures keyed by dev+ino */ + Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */ + + /* The buffer we read inotify events into */ + union inotify_event_buffer buffer; + size_t buffer_filled; /* fill level of the buffer */ + + /* How many event sources are currently marked pending for this inotify. We won't read new events off the + * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing + * the events locally if they can't be coalesced). */ + unsigned n_pending; + + /* A linked list of all inotify objects with data already read, that still need processing. We keep this list + * to make it efficient to figure out what inotify objects to process data on next. */ + LIST_FIELDS(struct inotify_data, buffered); +}; + struct sd_event { unsigned n_ref; @@ -202,6 +272,14 @@ struct sd_event { Prioq *exit; + Hashmap *inotify_data; /* indexed by priority */ + + /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */ + LIST_HEAD(struct inode_data, inode_data_to_close); + + /* A list of inotify objects that already have events buffered which aren't processed yet */ + LIST_HEAD(struct inotify_data, inotify_data_buffered); + pid_t original_pid; uint64_t iteration; @@ -231,6 +309,7 @@ struct sd_event { static thread_local sd_event *default_event = NULL; static void source_disconnect(sd_event_source *s); +static void event_gc_inode_data(sd_event *e, struct inode_data *d); static sd_event *event_resolve(sd_event *e) { return e == SD_EVENT_DEFAULT ? default_event : e; @@ -412,6 +491,8 @@ static void event_free(sd_event *e) { free(e->signal_sources); hashmap_free(e->signal_data); + hashmap_free(e->inotify_data); + hashmap_free(e->child_sources); set_free(e->post_sources); free(e); @@ -855,6 +936,41 @@ static void source_disconnect(sd_event_source *s) { prioq_remove(s->event->exit, s, &s->exit.prioq_index); break; + case SOURCE_INOTIFY: { + struct inode_data *inode_data; + + inode_data = s->inotify.inode_data; + if (inode_data) { + struct inotify_data *inotify_data; + assert_se(inotify_data = inode_data->inotify_data); + + /* Detach this event source from the inode object */ + LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s); + s->inotify.inode_data = NULL; + + if (s->pending) { + assert(inotify_data->n_pending > 0); + inotify_data->n_pending--; + } + + /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is + * continued to being watched. That's because inotify doesn't really have an API for that: we + * can only change watch masks with access to the original inode either by fd or by path. But + * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd + * continously and keeping the mount busy which we can't really do. We could reconstruct the + * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed + * there), but given the need for open_by_handle_at() which is privileged and not universally + * available this would be quite an incomplete solution. Hence we go the other way, leave the + * mask set, even if it is not minimized now, and ignore all events we aren't interested in + * anymore after reception. Yes, this sucks, but … Linux … */ + + /* Maybe release the inode data (and its inotify) */ + event_gc_inode_data(s->event, inode_data); + } + + break; + } + default: assert_not_reached("Wut? I shouldn't exist."); } @@ -929,6 +1045,19 @@ static int source_set_pending(sd_event_source *s, bool b) { d->current = NULL; } + if (s->type == SOURCE_INOTIFY) { + + assert(s->inotify.inode_data); + assert(s->inotify.inode_data->inotify_data); + + if (b) + s->inotify.inode_data->inotify_data->n_pending ++; + else { + assert(s->inotify.inode_data->inotify_data->n_pending > 0); + s->inotify.inode_data->inotify_data->n_pending --; + } + } + return 0; } @@ -1377,6 +1506,405 @@ _public_ int sd_event_add_exit( return 0; } +static void event_free_inotify_data(sd_event *e, struct inotify_data *d) { + assert(e); + + if (!d) + return; + + assert(hashmap_isempty(d->inodes)); + assert(hashmap_isempty(d->wd)); + + if (d->buffer_filled > 0) + LIST_REMOVE(buffered, e->inotify_data_buffered, d); + + hashmap_free(d->inodes); + hashmap_free(d->wd); + + assert_se(hashmap_remove(e->inotify_data, &d->priority) == d); + + if (d->fd >= 0) { + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m"); + + safe_close(d->fd); + } + free(d); +} + +static int event_make_inotify_data( + sd_event *e, + int64_t priority, + struct inotify_data **ret) { + + _cleanup_close_ int fd = -1; + struct inotify_data *d; + struct epoll_event ev; + int r; + + assert(e); + + d = hashmap_get(e->inotify_data, &priority); + if (d) { + if (ret) + *ret = d; + return 0; + } + + fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + + r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops); + if (r < 0) + return r; + + d = new(struct inotify_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct inotify_data) { + .wakeup = WAKEUP_INOTIFY_DATA, + .fd = TAKE_FD(fd), + .priority = priority, + }; + + r = hashmap_put(e->inotify_data, &d->priority, d); + if (r < 0) { + d->fd = safe_close(d->fd); + free(d); + return r; + } + + ev = (struct epoll_event) { + .events = EPOLLIN, + .data.ptr = d, + }; + + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) { + r = -errno; + d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise + * remove the fd from the epoll first, which we don't want as we couldn't + * add it in the first place. */ + event_free_inotify_data(e, d); + return r; + } + + if (ret) + *ret = d; + + return 1; +} + +static int inode_data_compare(const void *a, const void *b) { + const struct inode_data *x = a, *y = b; + + assert(x); + assert(y); + + if (x->dev < y->dev) + return -1; + if (x->dev > y->dev) + return 1; + + if (x->ino < y->ino) + return -1; + if (x->ino > y->ino) + return 1; + + return 0; +} + +static void inode_data_hash_func(const void *p, struct siphash *state) { + const struct inode_data *d = p; + + assert(p); + + siphash24_compress(&d->dev, sizeof(d->dev), state); + siphash24_compress(&d->ino, sizeof(d->ino), state); +} + +const struct hash_ops inode_data_hash_ops = { + .hash = inode_data_hash_func, + .compare = inode_data_compare +}; + +static void event_free_inode_data( + sd_event *e, + struct inode_data *d) { + + assert(e); + + if (!d) + return; + + assert(!d->event_sources); + + if (d->fd >= 0) { + LIST_REMOVE(to_close, e->inode_data_to_close, d); + safe_close(d->fd); + } + + if (d->inotify_data) { + + if (d->wd >= 0) { + if (d->inotify_data->fd >= 0) { + /* So here's a problem. At the time this runs the watch descriptor might already be + * invalidated, because an IN_IGNORED event might be queued right the moment we enter + * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very + * likely case to happen. */ + + if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL) + log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd); + } + + assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d); + } + + assert_se(hashmap_remove(d->inotify_data->inodes, d) == d); + } + + free(d); +} + +static void event_gc_inode_data( + sd_event *e, + struct inode_data *d) { + + struct inotify_data *inotify_data; + + assert(e); + + if (!d) + return; + + if (d->event_sources) + return; + + inotify_data = d->inotify_data; + event_free_inode_data(e, d); + + if (inotify_data && hashmap_isempty(inotify_data->inodes)) + event_free_inotify_data(e, inotify_data); +} + +static int event_make_inode_data( + sd_event *e, + struct inotify_data *inotify_data, + dev_t dev, + ino_t ino, + struct inode_data **ret) { + + struct inode_data *d, key; + int r; + + assert(e); + assert(inotify_data); + + key = (struct inode_data) { + .ino = ino, + .dev = dev, + }; + + d = hashmap_get(inotify_data->inodes, &key); + if (d) { + if (ret) + *ret = d; + + return 0; + } + + r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops); + if (r < 0) + return r; + + d = new(struct inode_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct inode_data) { + .dev = dev, + .ino = ino, + .wd = -1, + .fd = -1, + .inotify_data = inotify_data, + }; + + r = hashmap_put(inotify_data->inodes, d, d); + if (r < 0) { + free(d); + return r; + } + + if (ret) + *ret = d; + + return 1; +} + +static uint32_t inode_data_determine_mask(struct inode_data *d) { + bool excl_unlink = true; + uint32_t combined = 0; + sd_event_source *s; + + assert(d); + + /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but + * the IN_EXCL_UNLINK flag is ANDed instead. + * + * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's + * because we cannot change the mask anymore after the event source was created once, since the kernel has no + * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress + * events we don't care for client-side. */ + + LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) { + + if ((s->inotify.mask & IN_EXCL_UNLINK) == 0) + excl_unlink = false; + + combined |= s->inotify.mask; + } + + return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0); +} + +static int inode_data_realize_watch(sd_event *e, struct inode_data *d) { + uint32_t combined_mask; + int wd, r; + + assert(d); + assert(d->fd >= 0); + + combined_mask = inode_data_determine_mask(d); + + if (d->wd >= 0 && combined_mask == d->combined_mask) + return 0; + + r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL); + if (r < 0) + return r; + + wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask); + if (wd < 0) + return -errno; + + if (d->wd < 0) { + r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d); + if (r < 0) { + (void) inotify_rm_watch(d->inotify_data->fd, wd); + return r; + } + + d->wd = wd; + + } else if (d->wd != wd) { + + log_debug("Weird, the watch descriptor we already knew for this inode changed?"); + (void) inotify_rm_watch(d->fd, wd); + return -EINVAL; + } + + d->combined_mask = combined_mask; + return 1; +} + +_public_ int sd_event_add_inotify( + sd_event *e, + sd_event_source **ret, + const char *path, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { + + bool rm_inotify = false, rm_inode = false; + struct inotify_data *inotify_data = NULL; + struct inode_data *inode_data = NULL; + _cleanup_close_ int fd = -1; + sd_event_source *s; + struct stat st; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(path, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_pid_changed(e), -ECHILD); + + /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge + * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence + * the user can't use them for us. */ + if (mask & IN_MASK_ADD) + return -EINVAL; + + fd = open(path, O_PATH|O_CLOEXEC| + (mask & IN_ONLYDIR ? O_DIRECTORY : 0)| + (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + s = source_new(e, !ret, SOURCE_INOTIFY); + if (!s) + return -ENOMEM; + + s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON; + s->inotify.mask = mask; + s->inotify.callback = callback; + s->userdata = userdata; + + /* Allocate an inotify object for this priority, and an inode object within it */ + r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data); + if (r < 0) + goto fail; + rm_inotify = r > 0; + + r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data); + if (r < 0) + goto fail; + rm_inode = r > 0; + + /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of + * the event source, until then, for which we need the original inode. */ + if (inode_data->fd < 0) { + inode_data->fd = TAKE_FD(fd); + LIST_PREPEND(to_close, e->inode_data_to_close, inode_data); + } + + /* Link our event source to the inode data object */ + LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s); + s->inotify.inode_data = inode_data; + + rm_inode = rm_inotify = false; + + /* Actually realize the watch now */ + r = inode_data_realize_watch(e, inode_data); + if (r < 0) + goto fail; + + (void) sd_event_source_set_description(s, path); + + if (ret) + *ret = s; + + return 0; + +fail: + source_free(s); + + if (rm_inode) + event_free_inode_data(e, inode_data); + + if (rm_inotify) + event_free_inotify_data(e, inotify_data); + + return r; +} + _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) { if (!s) @@ -1574,6 +2102,9 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) } _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) { + bool rm_inotify = false, rm_inode = false; + struct inotify_data *new_inotify_data = NULL; + struct inode_data *new_inode_data = NULL; int r; assert_return(s, -EINVAL); @@ -1583,7 +2114,59 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) if (s->priority == priority) return 0; - if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { + if (s->type == SOURCE_INOTIFY) { + struct inode_data *old_inode_data; + + assert(s->inotify.inode_data); + old_inode_data = s->inotify.inode_data; + + /* We need the original fd to change the priority. If we don't have it we can't change the priority, + * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify + * events we allow priority changes only until the first following iteration. */ + if (old_inode_data->fd < 0) + return -EOPNOTSUPP; + + r = event_make_inotify_data(s->event, priority, &new_inotify_data); + if (r < 0) + return r; + rm_inotify = r > 0; + + r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data); + if (r < 0) + goto fail; + rm_inode = r > 0; + + if (new_inode_data->fd < 0) { + /* Duplicate the fd for the new inode object if we don't have any yet */ + new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3); + if (new_inode_data->fd < 0) { + r = -errno; + goto fail; + } + + LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data); + } + + /* Move the event source to the new inode data structure */ + LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s); + LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s); + s->inotify.inode_data = new_inode_data; + + /* Now create the new watch */ + r = inode_data_realize_watch(s->event, new_inode_data); + if (r < 0) { + /* Move it back */ + LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s); + LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s); + s->inotify.inode_data = old_inode_data; + goto fail; + } + + s->priority = priority; + + event_gc_inode_data(s->event, old_inode_data); + + } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { struct signal_data *old, *d; /* Move us from the signalfd belonging to the old @@ -1613,6 +2196,15 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); return 0; + +fail: + if (rm_inode) + event_free_inode_data(s->event, new_inode_data); + + if (rm_inotify) + event_free_inotify_data(s->event, new_inotify_data); + + return r; } _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) { @@ -1694,6 +2286,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { case SOURCE_DEFER: case SOURCE_POST: + case SOURCE_INOTIFY: s->enabled = m; break; @@ -1774,6 +2367,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { case SOURCE_DEFER: case SOURCE_POST: + case SOURCE_INOTIFY: s->enabled = m; break; @@ -1884,6 +2478,16 @@ _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { return 0; } +_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) { + assert_return(s, -EINVAL); + assert_return(mask, -EINVAL); + assert_return(s->type == SOURCE_INOTIFY, -EDOM); + assert_return(!event_pid_changed(s->event), -ECHILD); + + *mask = s->inotify.mask; + return 0; +} + _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) { int r; @@ -2217,6 +2821,7 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { int r; assert(e); + assert(d); assert_return(events == EPOLLIN, -EIO); /* If there's a signal queued on this priority and SIGCHLD is @@ -2273,6 +2878,160 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { } } +static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) { + ssize_t n; + + assert(e); + assert(d); + + assert_return(revents == EPOLLIN, -EIO); + + /* If there's already an event source pending for this priority, don't read another */ + if (d->n_pending > 0) + return 0; + + /* Is the read buffer non-empty? If so, let's not read more */ + if (d->buffer_filled > 0) + return 0; + + n = read(d->fd, &d->buffer, sizeof(d->buffer)); + if (n < 0) { + if (IN_SET(errno, EAGAIN, EINTR)) + return 0; + + return -errno; + } + + assert(n > 0); + d->buffer_filled = (size_t) n; + LIST_PREPEND(buffered, e->inotify_data_buffered, d); + + return 1; +} + +static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) { + assert(e); + assert(d); + assert(sz <= d->buffer_filled); + + if (sz == 0) + return; + + /* Move the rest to the buffer to the front, in order to get things properly aligned again */ + memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz); + d->buffer_filled -= sz; + + if (d->buffer_filled == 0) + LIST_REMOVE(buffered, e->inotify_data_buffered, d); +} + +static int event_inotify_data_process(sd_event *e, struct inotify_data *d) { + int r; + + assert(e); + assert(d); + + /* If there's already an event source pending for this priority, don't read another */ + if (d->n_pending > 0) + return 0; + + while (d->buffer_filled > 0) { + size_t sz; + + /* Let's validate that the event structures are complete */ + if (d->buffer_filled < offsetof(struct inotify_event, name)) + return -EIO; + + sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; + if (d->buffer_filled < sz) + return -EIO; + + if (d->buffer.ev.mask & IN_Q_OVERFLOW) { + struct inode_data *inode_data; + Iterator i; + + /* The queue overran, let's pass this event to all event sources connected to this inotify + * object */ + + HASHMAP_FOREACH(inode_data, d->inodes, i) { + sd_event_source *s; + + LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) { + + if (s->enabled == SD_EVENT_OFF) + continue; + + r = source_set_pending(s, true); + if (r < 0) + return r; + } + } + } else { + struct inode_data *inode_data; + sd_event_source *s; + + /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from + * our watch descriptor table. */ + if (d->buffer.ev.mask & IN_IGNORED) { + + inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd)); + if (!inode_data) { + event_inotify_data_drop(e, d, sz); + continue; + } + + /* The watch descriptor was removed by the kernel, let's drop it here too */ + inode_data->wd = -1; + } else { + inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd)); + if (!inode_data) { + event_inotify_data_drop(e, d, sz); + continue; + } + } + + /* Trigger all event sources that are interested in these events. Also trigger all event + * sources if IN_IGNORED or IN_UNMOUNT is set. */ + LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) { + + if (s->enabled == SD_EVENT_OFF) + continue; + + if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 && + (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0) + continue; + + r = source_set_pending(s, true); + if (r < 0) + return r; + } + } + + /* Something pending now? If so, let's finish, otherwise let's read more. */ + if (d->n_pending > 0) + return 1; + } + + return 0; +} + +static int process_inotify(sd_event *e) { + struct inotify_data *d; + int r, done = 0; + + assert(e); + + LIST_FOREACH(buffered, d, e->inotify_data_buffered) { + r = event_inotify_data_process(e, d); + if (r < 0) + return r; + if (r > 0) + done ++; + } + + return done; +} + static int source_dispatch(sd_event_source *s) { EventSourceType saved_type; int r = 0; @@ -2359,6 +3118,28 @@ static int source_dispatch(sd_event_source *s) { r = s->exit.callback(s, s->userdata); break; + case SOURCE_INOTIFY: { + struct sd_event *e = s->event; + struct inotify_data *d; + size_t sz; + + assert(s->inotify.inode_data); + assert_se(d = s->inotify.inode_data->inotify_data); + + assert(d->buffer_filled >= offsetof(struct inotify_event, name)); + sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; + assert(d->buffer_filled >= sz); + + r = s->inotify.callback(s, &d->buffer.ev, s->userdata); + + /* When no event is pending anymore on this inotify object, then let's drop the event from the + * buffer. */ + if (d->n_pending == 0) + event_inotify_data_drop(e, d, sz); + + break; + } + case SOURCE_WATCHDOG: case _SOURCE_EVENT_SOURCE_TYPE_MAX: case _SOURCE_EVENT_SOURCE_TYPE_INVALID: @@ -2493,6 +3274,25 @@ static int process_watchdog(sd_event *e) { return arm_watchdog(e); } +static void event_close_inode_data_fds(sd_event *e) { + struct inode_data *d; + + assert(e); + + /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin + * filesystems. But we can't close them right-away as we need them as long as the user still wants to make + * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch + * for the inode). Hence, let's close them when entering the first iteration after they were added, as a + * compromise. */ + + while ((d = e->inode_data_to_close)) { + assert(d->fd >= 0); + d->fd = safe_close(d->fd); + + LIST_REMOVE(to_close, e->inode_data_to_close, d); + } +} + _public_ int sd_event_prepare(sd_event *e) { int r; @@ -2533,6 +3333,8 @@ _public_ int sd_event_prepare(sd_event *e) { if (r < 0) return r; + event_close_inode_data_fds(e); + if (event_next_pending(e) || e->need_process_child) goto pending; @@ -2568,6 +3370,10 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { ev_queue_max = MAX(e->n_sources, 1u); ev_queue = newa(struct epoll_event, ev_queue_max); + /* If we still have inotify data buffered, then query the other fds, but don't wait on it */ + if (e->inotify_data_buffered) + timeout = 0; + m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max, timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC)); if (m < 0) { @@ -2605,6 +3411,10 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events); break; + case WAKEUP_INOTIFY_DATA: + r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + default: assert_not_reached("Invalid wake-up pointer"); } @@ -2643,6 +3453,10 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { goto finish; } + r = process_inotify(e); + if (r < 0) + goto finish; + if (event_next_pending(e)) { e->state = SD_EVENT_PENDING; diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h index ec4b7bcf69..28939ca550 100644 --- a/src/systemd/sd-event.h +++ b/src/systemd/sd-event.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,7 @@ typedef int (*sd_event_child_handler_t)(sd_event_source *s, const siginfo_t *si, #else typedef void* sd_event_child_handler_t; #endif +typedef int (*sd_event_inotify_handler_t)(sd_event_source *s, const struct inotify_event *event, void *userdata); int sd_event_default(sd_event **e); @@ -89,6 +91,7 @@ int sd_event_add_io(sd_event *e, sd_event_source **s, int fd, uint32_t events, s int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata); int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata); int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata); +int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata); int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); @@ -139,6 +142,7 @@ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec); int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock); int sd_event_source_get_signal(sd_event_source *s); int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid); +int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret); /* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */ _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref); From 41e09d62c5c98d4c5a88230a668622df16c0a78f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 28 May 2018 17:25:14 +0200 Subject: [PATCH 07/14] sd-event: add test for the new sd_event_add_inotify() API This tests a couple of corner cases of the sd-event API including changing priorities of existing event sources, as well as overflow conditions of the inotify queue. --- src/libsystemd/sd-event/test-event.c | 150 ++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 2 deletions(-) diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c index c481c94d16..db3dd6ad9d 100644 --- a/src/libsystemd/sd-event/test-event.c +++ b/src/libsystemd/sd-event/test-event.c @@ -9,12 +9,19 @@ #include "sd-event.h" +#include "alloc-util.h" #include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" #include "log.h" #include "macro.h" -#include "signal-util.h" -#include "util.h" +#include "parse-util.h" #include "process-util.h" +#include "rm-rf.h" +#include "signal-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "util.h" static int prepare_handler(sd_event_source *s, void *userdata) { log_info("preparing %c", PTR_TO_INT(userdata)); @@ -342,6 +349,142 @@ static void test_rtqueue(void) { sd_event_unref(e); } +#define CREATE_EVENTS_MAX (70000U) + +struct inotify_context { + bool delete_self_handler_called; + unsigned create_called[CREATE_EVENTS_MAX]; + unsigned create_overflow; + unsigned n_create_events; +}; + +static void maybe_exit(sd_event_source *s, struct inotify_context *c) { + unsigned n; + + assert(s); + assert(c); + + if (!c->delete_self_handler_called) + return; + + for (n = 0; n < 3; n++) { + unsigned i; + + if (c->create_overflow & (1U << n)) + continue; + + for (i = 0; i < c->n_create_events; i++) + if (!(c->create_called[i] & (1U << n))) + return; + } + + sd_event_exit(sd_event_source_get_event(s), 0); +} + +static int inotify_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + struct inotify_context *c = userdata; + const char *description; + unsigned bit, n; + + assert_se(sd_event_source_get_description(s, &description) >= 0); + assert_se(safe_atou(description, &n) >= 0); + + assert_se(n <= 3); + bit = 1U << n; + + if (ev->mask & IN_Q_OVERFLOW) { + log_info("inotify-handler <%s>: overflow", description); + c->create_overflow |= bit; + } else if (ev->mask & IN_CREATE) { + unsigned i; + + log_info("inotify-handler <%s>: create on %s", description, ev->name); + + if (!streq(ev->name, "sub")) { + assert_se(safe_atou(ev->name, &i) >= 0); + + assert_se(i < c->n_create_events); + c->create_called[i] |= bit; + } + } else if (ev->mask & IN_DELETE) { + log_info("inotify-handler <%s>: delete of %s", description, ev->name); + assert_se(streq(ev->name, "sub")); + } else + assert_not_reached("unexpected inotify event"); + + maybe_exit(s, c); + return 1; +} + +static int delete_self_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + struct inotify_context *c = userdata; + + if (ev->mask & IN_Q_OVERFLOW) { + log_info("delete-self-handler: overflow"); + c->delete_self_handler_called = true; + } else if (ev->mask & IN_DELETE_SELF) { + log_info("delete-self-handler: delete-self"); + c->delete_self_handler_called = true; + } else if (ev->mask & IN_IGNORED) { + log_info("delete-self-handler: ignore"); + } else + assert_not_reached("unexpected inotify event (delete-self)"); + + maybe_exit(s, c); + return 1; +} + +static void test_inotify(unsigned n_create_events) { + _cleanup_(rm_rf_physical_and_freep) char *p = NULL; + sd_event_source *a = NULL, *b = NULL, *c = NULL, *d = NULL; + struct inotify_context context = { + .n_create_events = n_create_events, + }; + sd_event *e = NULL; + const char *q; + unsigned i; + + assert_se(sd_event_default(&e) >= 0); + + assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0); + + assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE|IN_ONLYDIR, inotify_handler, &context) >= 0); + assert_se(sd_event_add_inotify(e, &b, p, IN_CREATE|IN_DELETE|IN_DONT_FOLLOW, inotify_handler, &context) >= 0); + assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_IDLE) >= 0); + assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_NORMAL) >= 0); + assert_se(sd_event_add_inotify(e, &c, p, IN_CREATE|IN_DELETE|IN_EXCL_UNLINK, inotify_handler, &context) >= 0); + assert_se(sd_event_source_set_priority(c, SD_EVENT_PRIORITY_IDLE) >= 0); + + assert_se(sd_event_source_set_description(a, "0") >= 0); + assert_se(sd_event_source_set_description(b, "1") >= 0); + assert_se(sd_event_source_set_description(c, "2") >= 0); + + q = strjoina(p, "/sub"); + assert_se(touch(q) >= 0); + assert_se(sd_event_add_inotify(e, &d, q, IN_DELETE_SELF, delete_self_handler, &context) >= 0); + + for (i = 0; i < n_create_events; i++) { + char buf[DECIMAL_STR_MAX(unsigned)+1]; + _cleanup_free_ char *z; + + xsprintf(buf, "%u", i); + assert_se(z = strjoin(p, "/", buf)); + + assert_se(touch(z) >= 0); + } + + assert_se(unlink(q) >= 0); + + assert_se(sd_event_loop(e) >= 0); + + sd_event_source_unref(a); + sd_event_source_unref(b); + sd_event_source_unref(c); + sd_event_source_unref(d); + + sd_event_unref(e); +} + int main(int argc, char *argv[]) { log_set_max_level(LOG_DEBUG); @@ -351,5 +494,8 @@ int main(int argc, char *argv[]) { test_sd_event_now(); test_rtqueue(); + test_inotify(100); /* should work without overflow */ + test_inotify(33000); /* should trigger a q overflow */ + return 0; } From 1eb54dc6457a5009e04f214e0d24ca00c782c5a5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 29 May 2018 15:59:26 +0200 Subject: [PATCH 08/14] man: document the new sd_event_add_inotify() call --- man/rules/meson.build | 4 + man/sd-event.xml | 2 + man/sd_event_add_child.xml | 1 + man/sd_event_add_defer.xml | 1 + man/sd_event_add_inotify.xml | 181 +++++++++++++++++++++++++++ man/sd_event_add_io.xml | 1 + man/sd_event_add_signal.xml | 1 + man/sd_event_add_time.xml | 1 + man/sd_event_source_set_priority.xml | 6 + 9 files changed, 198 insertions(+) create mode 100644 man/sd_event_add_inotify.xml diff --git a/man/rules/meson.build b/man/rules/meson.build index 0fb7978c21..e1e2c5e60d 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -263,6 +263,10 @@ manpages = [ '3', ['sd_event_add_exit', 'sd_event_add_post', 'sd_event_handler_t'], ''], + ['sd_event_add_inotify', + '3', + ['sd_event_inotify_handler_t', 'sd_event_source_get_inotify_mask'], + ''], ['sd_event_add_io', '3', ['sd_event_io_handler_t', diff --git a/man/sd-event.xml b/man/sd-event.xml index 59f933526a..ecaa2753a3 100644 --- a/man/sd-event.xml +++ b/man/sd-event.xml @@ -62,6 +62,7 @@ sd_event_add_time3, sd_event_add_signal3, sd_event_add_child3, + sd_event_add_inotify3, sd_event_add_defer3, sd_event_source_unref3, sd_event_source_set_priority3, @@ -152,6 +153,7 @@ sd_event_add_time3, sd_event_add_signal3, sd_event_add_child3, + sd_event_add_inotify3, sd_event_add_defer3, sd_event_source_unref3, sd_event_source_set_priority3, diff --git a/man/sd_event_add_child.xml b/man/sd_event_add_child.xml index d3a90ea05e..43c44ab3a3 100644 --- a/man/sd_event_add_child.xml +++ b/man/sd_event_add_child.xml @@ -223,6 +223,7 @@ sd_event_add_io3, sd_event_add_time3, sd_event_add_signal3, + sd_event_add_inotify3, sd_event_add_defer3, sd_event_source_set_enabled3, sd_event_source_set_priority3, diff --git a/man/sd_event_add_defer.xml b/man/sd_event_add_defer.xml index 0276443105..54570afdb5 100644 --- a/man/sd_event_add_defer.xml +++ b/man/sd_event_add_defer.xml @@ -194,6 +194,7 @@ sd_event_add_time3, sd_event_add_signal3, sd_event_add_child3, + sd_event_add_inotify3, sd_event_source_set_enabled3, sd_event_source_set_priority3, sd_event_source_set_userdata3, diff --git a/man/sd_event_add_inotify.xml b/man/sd_event_add_inotify.xml new file mode 100644 index 0000000000..af3b6bcfdf --- /dev/null +++ b/man/sd_event_add_inotify.xml @@ -0,0 +1,181 @@ + + + + + + + + + sd_event_add_inotify + systemd + + + + sd_event_add_inotify + 3 + + + + sd_event_add_inotify + sd_event_source_get_inotify_mask + sd_event_inotify_handler_t + + Add an "inotify" file system inode event source to an event loop + + + + + #include <systemd/sd-event.h> + + typedef struct sd_event_source sd_event_source; + + + typedef int (*sd_event_inotify_handler_t) + sd_event_source *s + const struct inotify_event *event + void *userdata + + + + int sd_event_add_inotify + sd_event *event + sd_event_source **source + const char *path + uint32_t mask + sd_event_inotify_handler_t handler + void *userdata + + + + int sd_event_source_get_inotify_mask + sd_event_source *source + uint32_t *mask + + + + + + + Description + + sd_event_add_inotify() adds a new inotify7 file system inode + event source to an event loop. The event loop object is specified in the event parameter, + the event source object is returned in the source parameter. The path + parameter specifies the path of the file system inode to watch. The handler must reference a + function to call when the inode changes. The handler function will be passed the userdata + pointer, which may be chosen freely by the caller. The handler also receives a pointer to a struct + inotify_event structure containing information about the inode event. The mask + parameter specifie which types of inode events to watch specifically. It must contain an OR-ed combination of + IN_ACCESS, IN_ATTRIB, IN_CLOSE_WRITE, … flags. See + inotify7 for + further information. + + If multiple event sources are installed for the same inode the backing inotify watch descriptor is + automatically shared. The mask parameter may contain any flag defined by the inotify API, with the exception of + IN_MASK_ADD. + + The handler is enabled continuously (SD_EVENT_ON), but this may be changed with + sd_event_source_set_enabled3. Alternatively, + the IN_ONESHOT mask flag may be used to request SD_EVENT_ONESHOT mode. + If the handler function returns a negative error code, it will be disabled after the invocation, even if the + SD_EVENT_ON mode was requested before. + + + As a special limitation the priority of inotify event sources may only be altered (see + sd_event_source_set_priority3) + in the time between creation of the event source object with sd_event_add_inotify() and the + beginning of the next event loop iteration. Attempts of changing the priority any later will be refused. Consider + freeing and allocating a new inotify event source to change the priority at that point. + + To destroy an event source object use + sd_event_source_unref3, but note + that the event source is only removed from the event loop when all references to the event source are dropped. To + make sure an event source does not fire anymore, even when there's still a reference to it kept, consider disabling + it with + sd_event_source_set_enabled3. + + If the second parameter of sd_event_add_inotify() is passed as NULL no reference to the + event source object is returned. In this case the event source is considered "floating", and will be destroyed + implicitly when the event loop itself is destroyed. + + sd_event_source_get_inotify_mask() retrieves the configured inotify watch mask of an + event source created previously with sd_event_add_inotify(). It takes the event source object + as the source parameter and a pointer to a uint32_t variable to return the mask + in. + + + + Return Value + + On success, these functions return 0 or a positive integer. On failure, they return a negative errno-style + error code. + + + + Errors + + Returned errors may indicate the following problems: + + + + -ENOMEM + + Not enough memory to allocate an object. + + + + -EINVAL + + An invalid argument has been passed. This includes specifying a mask with + IN_MASK_ADD set. + + + + -ESTALE + + The event loop is already terminated. + + + + + -ECHILD + + The event loop has been created in a different process. + + + + + -EDOM + + The passed event source is not an inotify process event source. + + + + + + + + + See Also + + + systemd1, + sd-event3, + sd_event_new3, + sd_event_now3, + sd_event_add_io3, + sd_event_add_time3, + sd_event_add_signal3, + sd_event_add_defer3, + sd_event_add_child3, + sd_event_source_set_enabled3, + sd_event_source_set_priority3, + sd_event_source_set_userdata3, + sd_event_source_set_description3, + waitid2 + + + + diff --git a/man/sd_event_add_io.xml b/man/sd_event_add_io.xml index 1841e3d66f..d5ef462e53 100644 --- a/man/sd_event_add_io.xml +++ b/man/sd_event_add_io.xml @@ -275,6 +275,7 @@ sd_event_add_time3, sd_event_add_signal3, sd_event_add_child3, + sd_event_add_inotify3, sd_event_add_defer3, sd_event_source_set_enabled3, sd_event_source_set_priority3, diff --git a/man/sd_event_add_signal.xml b/man/sd_event_add_signal.xml index cc132ffffc..74f0191bc4 100644 --- a/man/sd_event_add_signal.xml +++ b/man/sd_event_add_signal.xml @@ -198,6 +198,7 @@ sd_event_add_io3, sd_event_add_time3, sd_event_add_child3, + sd_event_add_inotify3, sd_event_add_defer3, sd_event_source_set_enabled3, sd_event_source_set_description3, diff --git a/man/sd_event_add_time.xml b/man/sd_event_add_time.xml index 121b64301c..9015315105 100644 --- a/man/sd_event_add_time.xml +++ b/man/sd_event_add_time.xml @@ -288,6 +288,7 @@ sd_event_add_io3, sd_event_add_signal3, sd_event_add_child3, + sd_event_add_inotify3, sd_event_add_defer3, sd_event_source_set_enabled3, sd_event_source_set_priority3, diff --git a/man/sd_event_source_set_priority.xml b/man/sd_event_source_set_priority.xml index 60fe7e3b2b..0bc6be9cb9 100644 --- a/man/sd_event_source_set_priority.xml +++ b/man/sd_event_source_set_priority.xml @@ -108,6 +108,12 @@ particular event sources do not starve or dominate the event loop. + The priority of event sources may be changed at any time of their lifetime, with the exception of inotify + event sources (i.e. those created with + sd_event_add_inotify3) whose + priority may only be changed in the time between their initial creation and the first subsequent event loop + iteration. + sd_event_source_get_priority() may be used to query the current priority assigned to the event source object source. From 7feedd18fa57ba42426dfffd702dfc78ea0c6d7a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 28 May 2018 21:32:03 +0200 Subject: [PATCH 09/14] core: move destruction of old time event sources to manager_setup_time_change() It's a bit prettier that day as the function won't silently overwrite any possibly pre-initialized field, and destroy it right before we allocate a new event source. --- src/core/manager.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index f5664eaa79..3a2674ce93 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -364,6 +364,9 @@ static int manager_setup_time_change(Manager *m) { if (m->test_run_flags) return 0; + m->time_change_event_source = sd_event_source_unref(m->time_change_event_source); + m->time_change_fd = safe_close(m->time_change_fd); + /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever * CLOCK_REALTIME makes a jump relative to CLOCK_MONOTONIC */ @@ -2558,10 +2561,7 @@ static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint LOG_MESSAGE("Time has been changed")); /* Restart the watch */ - m->time_change_event_source = sd_event_source_unref(m->time_change_event_source); - m->time_change_fd = safe_close(m->time_change_fd); - - manager_setup_time_change(m); + (void) manager_setup_time_change(m); HASHMAP_FOREACH(u, m->units, i) if (UNIT_VTABLE(u)->time_change) From bbf5fd8e41b1abdf03c8ab463a2c9af7c7dc64d8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 28 May 2018 21:33:10 +0200 Subject: [PATCH 10/14] core: subscribe to /etc/localtime timezone changes and update timer elapsation accordingly Fixes: #8233 This is our first real-life usecase for the new sd_event_add_inotify() calls we just added. --- src/core/manager.c | 104 +++++++++++++++++++++++++++++++++++++++++++++ src/core/manager.h | 6 +++ src/core/timer.c | 13 ++++++ src/core/unit.h | 3 ++ 4 files changed, 126 insertions(+) diff --git a/src/core/manager.c b/src/core/manager.c index 3a2674ce93..0f767d651c 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -108,6 +108,7 @@ static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata); static int manager_dispatch_run_queue(sd_event_source *source, void *userdata); static int manager_dispatch_sigchld(sd_event_source *source, void *userdata); +static int manager_dispatch_timezone_change(sd_event_source *source, const struct inotify_event *event, void *userdata); static int manager_run_environment_generators(Manager *m); static int manager_run_generators(Manager *m); @@ -391,6 +392,65 @@ static int manager_setup_time_change(Manager *m) { return 0; } +static int manager_read_timezone_stat(Manager *m) { + struct stat st; + bool changed; + + assert(m); + + /* Read the current stat() data of /etc/localtime so that we detect changes */ + if (lstat("/etc/localtime", &st) < 0) { + log_debug_errno(errno, "Failed to stat /etc/localtime, ignoring: %m"); + changed = m->etc_localtime_accessible; + m->etc_localtime_accessible = false; + } else { + usec_t k; + + k = timespec_load(&st.st_mtim); + changed = !m->etc_localtime_accessible || k != m->etc_localtime_mtime; + + m->etc_localtime_mtime = k; + m->etc_localtime_accessible = true; + } + + return changed; +} + +static int manager_setup_timezone_change(Manager *m) { + sd_event_source *new_event = NULL; + int r; + + assert(m); + + if (m->test_run_flags != 0) + return 0; + + /* We watch /etc/localtime for three events: change of the link count (which might mean removal from /etc even + * though another link might be kept), renames, and file close operations after writing. Note we don't bother + * with IN_DELETE_SELF, as that would just report when the inode is removed entirely, i.e. after the link count + * went to zero and all fds to it are closed. + * + * Note that we never follow symlinks here. This is a simplification, but should cover almost all cases + * correctly. + * + * Note that we create the new event source first here, before releasing the old one. This should optimize + * behaviour as this way sd-event can reuse the old watch in case the inode didn't change. */ + + r = sd_event_add_inotify(m->event, &new_event, "/etc/localtime", + IN_ATTRIB|IN_MOVE_SELF|IN_CLOSE_WRITE|IN_DONT_FOLLOW, manager_dispatch_timezone_change, m); + if (r == -ENOENT) /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created + * either by O_CREATE or by rename() */ + r = sd_event_add_inotify(m->event, &new_event, "/etc", + IN_CREATE|IN_MOVED_TO|IN_ONLYDIR, manager_dispatch_timezone_change, m); + if (r < 0) + return log_error_errno(r, "Failed to create timezone change event source: %m"); + + sd_event_source_unref(m->timezone_change_event_source); + m->timezone_change_event_source = new_event; + + return 0; +} + static int enable_special_signals(Manager *m) { _cleanup_close_ int fd = -1; @@ -775,6 +835,14 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { if (r < 0) return r; + r = manager_read_timezone_stat(m); + if (r < 0) + return r; + + r = manager_setup_timezone_change(m); + if (r < 0) + return r; + r = manager_setup_sigchld_event_source(m); if (r < 0) return r; @@ -1216,6 +1284,7 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->notify_event_source); sd_event_source_unref(m->cgroups_agent_event_source); sd_event_source_unref(m->time_change_event_source); + sd_event_source_unref(m->timezone_change_event_source); sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->run_queue_event_source); sd_event_source_unref(m->user_lookup_event_source); @@ -2570,6 +2639,41 @@ static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint return 0; } +static int manager_dispatch_timezone_change( + sd_event_source *source, + const struct inotify_event *e, + void *userdata) { + + Manager *m = userdata; + int changed; + Iterator i; + Unit *u; + + assert(m); + + log_debug("inotify event for /etc/localtime"); + + changed = manager_read_timezone_stat(m); + if (changed < 0) + return changed; + if (!changed) + return 0; + + /* Something changed, restart the watch, to ensure we watch the new /etc/localtime if it changed */ + (void) manager_setup_timezone_change(m); + + /* Read the new timezone */ + tzset(); + + log_debug("Timezone has been changed (now: %s).", tzname[daylight]); + + HASHMAP_FOREACH(u, m->units, i) + if (UNIT_VTABLE(u)->timezone_change) + UNIT_VTABLE(u)->timezone_change(u); + + return 0; +} + static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; diff --git a/src/core/manager.h b/src/core/manager.h index 1f97c15365..8868e9c158 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -170,6 +170,8 @@ struct Manager { int time_change_fd; sd_event_source *time_change_event_source; + sd_event_source *timezone_change_event_source; + sd_event_source *jobs_in_progress_event_source; int user_lookup_fds[2]; @@ -250,6 +252,10 @@ struct Manager { unsigned gc_marker; + /* The stat() data the last time we saw /etc/localtime */ + usec_t etc_localtime_mtime; + bool etc_localtime_accessible:1; + /* Flags */ ManagerExitCode exit_code:5; diff --git a/src/core/timer.c b/src/core/timer.c index b2fe05d4cf..5babb6d463 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -819,6 +819,18 @@ static void timer_time_change(Unit *u) { timer_enter_waiting(t, false); } +static void timer_timezone_change(Unit *u) { + Timer *t = TIMER(u); + + assert(u); + + if (t->state != TIMER_WAITING) + return; + + log_unit_debug(u, "Timezone change, recalculating next elapse."); + timer_enter_waiting(t, false); +} + static const char* const timer_base_table[_TIMER_BASE_MAX] = { [TIMER_ACTIVE] = "OnActiveSec", [TIMER_BOOT] = "OnBootSec", @@ -868,6 +880,7 @@ const UnitVTable timer_vtable = { .reset_failed = timer_reset_failed, .time_change = timer_time_change, + .timezone_change = timer_timezone_change, .bus_vtable = bus_timer_vtable, .bus_set_property = bus_timer_set_property, diff --git a/src/core/unit.h b/src/core/unit.h index 76270a0a7c..32bdd10643 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -516,6 +516,9 @@ typedef struct UnitVTable { /* Called whenever CLOCK_REALTIME made a jump */ void (*time_change)(Unit *u); + /* Called whenever /etc/localtime was modified */ + void (*timezone_change)(Unit *u); + /* Returns the next timeout of a unit */ int (*get_timeout)(Unit *u, usec_t *timeout); From 4f811d27d6d9324e65ce628fa20ac1761ef98de0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 29 May 2018 12:55:33 +0200 Subject: [PATCH 11/14] time-util: introduce common implementation of TFD_TIMER_CANCEL_ON_SET client code We now use pretty much the same code at three places, let's unify that. --- src/basic/time-util.c | 24 ++++++++++++++++++++++++ src/basic/time-util.h | 2 ++ src/core/manager.c | 20 ++------------------ src/time-wait-sync/time-wait-sync.c | 26 ++++++++++---------------- src/timesync/timesyncd-manager.c | 12 ++---------- 5 files changed, 40 insertions(+), 44 deletions(-) diff --git a/src/basic/time-util.c b/src/basic/time-util.c index 671311e885..031b871640 100644 --- a/src/basic/time-util.c +++ b/src/basic/time-util.c @@ -1463,3 +1463,27 @@ bool in_utc_timezone(void) { return timezone == 0 && daylight == 0; } + +int time_change_fd(void) { + + /* We only care for the cancellation event, hence we set the timeout to the latest possible value. */ + static const struct itimerspec its = { + .it_value.tv_sec = TIME_T_MAX, + }; + + _cleanup_close_ int fd; + + assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX)); + + /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever CLOCK_REALTIME makes a jump relative to + * CLOCK_MONOTONIC. */ + + fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC); + if (fd < 0) + return -errno; + + if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0) + return -errno; + + return TAKE_FD(fd); +} diff --git a/src/basic/time-util.h b/src/basic/time-util.h index f9c34a3e15..54aebfa36b 100644 --- a/src/basic/time-util.h +++ b/src/basic/time-util.h @@ -185,3 +185,5 @@ static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) { #else #error "Yuck, time_t is neither 4 nor 8 bytes wide?" #endif + +int time_change_fd(void); diff --git a/src/core/manager.c b/src/core/manager.c index 0f767d651c..d2f628d2bb 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -353,14 +353,7 @@ static void manager_close_idle_pipe(Manager *m) { static int manager_setup_time_change(Manager *m) { int r; - /* We only care for the cancellation event, hence we set the - * timeout to the latest possible value. */ - struct itimerspec its = { - .it_value.tv_sec = TIME_T_MAX, - }; - assert(m); - assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX)); if (m->test_run_flags) return 0; @@ -368,18 +361,9 @@ static int manager_setup_time_change(Manager *m) { m->time_change_event_source = sd_event_source_unref(m->time_change_event_source); m->time_change_fd = safe_close(m->time_change_fd); - /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever - * CLOCK_REALTIME makes a jump relative to CLOCK_MONOTONIC */ - - m->time_change_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC); + m->time_change_fd = time_change_fd(); if (m->time_change_fd < 0) - return log_error_errno(errno, "Failed to create timerfd: %m"); - - if (timerfd_settime(m->time_change_fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0) { - log_debug_errno(errno, "Failed to set up TFD_TIMER_CANCEL_ON_SET, ignoring: %m"); - m->time_change_fd = safe_close(m->time_change_fd); - return 0; - } + return log_error_errno(m->time_change_fd, "Failed to create timer change timer fd: %m"); r = sd_event_add_io(m->event, &m->time_change_event_source, m->time_change_fd, EPOLLIN, manager_dispatch_time_change_fd, m); if (r < 0) diff --git a/src/time-wait-sync/time-wait-sync.c b/src/time-wait-sync/time-wait-sync.c index 198c055650..37b032b39e 100644 --- a/src/time-wait-sync/time-wait-sync.c +++ b/src/time-wait-sync/time-wait-sync.c @@ -115,16 +115,15 @@ static int inotify_handler(sd_event_source *s, return 0; } -static int clock_state_update(ClockState *sp, - sd_event *event) { - static const struct itimerspec its = { - .it_value.tv_sec = TIME_T_MAX, - }; - int r; - struct timex tx = {}; +static int clock_state_update( + ClockState *sp, + sd_event *event) { + char buf[MAX((size_t)FORMAT_TIMESTAMP_MAX, STRLEN("unrepresentable"))]; - usec_t t; + struct timex tx = {}; const char * ts; + usec_t t; + int r; clock_state_release_timerfd(sp); @@ -151,19 +150,14 @@ static int clock_state_update(ClockState *sp, * it synchronized. When an NTP source is selected it sets the clock again with clock_adjtime(2) which marks it * synchronized and also touches /run/systemd/timesync/synchronized which covers the case when the clock wasn't * "set". */ - r = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK | TFD_CLOEXEC); + + r = time_change_fd(); if (r < 0) { - log_error_errno(errno, "Failed to create timerfd: %m"); + log_error_errno(r, "Failed to create timerfd: %m"); goto finish; } sp->timerfd_fd = r; - r = timerfd_settime(sp->timerfd_fd, TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET, &its, NULL); - if (r < 0) { - log_error_errno(errno, "Failed to set timerfd conditions: %m"); - goto finish; - } - r = adjtimex(&tx); if (r < 0) { log_error_errno(errno, "Failed to read adjtimex state: %m"); diff --git a/src/timesync/timesyncd-manager.c b/src/timesync/timesyncd-manager.c index f76f07e655..46036c41f7 100644 --- a/src/timesync/timesyncd-manager.c +++ b/src/timesync/timesyncd-manager.c @@ -224,11 +224,6 @@ static int manager_clock_watch(sd_event_source *source, int fd, uint32_t revents /* wake up when the system time changes underneath us */ static int manager_clock_watch_setup(Manager *m) { - - struct itimerspec its = { - .it_value.tv_sec = TIME_T_MAX - }; - int r; assert(m); @@ -236,12 +231,9 @@ static int manager_clock_watch_setup(Manager *m) { m->event_clock_watch = sd_event_source_unref(m->event_clock_watch); safe_close(m->clock_watch_fd); - m->clock_watch_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC); + m->clock_watch_fd = time_change_fd(); if (m->clock_watch_fd < 0) - return log_error_errno(errno, "Failed to create timerfd: %m"); - - if (timerfd_settime(m->clock_watch_fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0) - return log_error_errno(errno, "Failed to set up timerfd: %m"); + return log_error_errno(m->clock_watch_fd, "Failed to create timerfd: %m"); r = sd_event_add_io(m->event, &m->event_clock_watch, m->clock_watch_fd, EPOLLIN, manager_clock_watch, m); if (r < 0) From a5cc7e5ac109eab0ded6d8ed265bd7944fc8aa10 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 29 May 2018 16:26:24 +0200 Subject: [PATCH 12/14] core: schedule time and timezone change events a bit before .timer elapsation events We really should make sure that .timer units are dispatched while taking the newest time/timezone data into account. --- src/core/manager.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index d2f628d2bb..7fc31ce569 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -369,6 +369,11 @@ static int manager_setup_time_change(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to create time change event source: %m"); + /* Schedule this slightly earlier than the .timer event sources */ + r = sd_event_source_set_priority(m->time_change_event_source, SD_EVENT_PRIORITY_NORMAL-1); + if (r < 0) + return log_error_errno(r, "Failed to set priority of time change event sources: %m"); + (void) sd_event_source_set_description(m->time_change_event_source, "manager-time-change"); log_debug("Set up TFD_TIMER_CANCEL_ON_SET timerfd."); @@ -401,7 +406,7 @@ static int manager_read_timezone_stat(Manager *m) { } static int manager_setup_timezone_change(Manager *m) { - sd_event_source *new_event = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *new_event = NULL; int r; assert(m); @@ -429,8 +434,13 @@ static int manager_setup_timezone_change(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to create timezone change event source: %m"); + /* Schedule this slightly earlier than the .timer event sources */ + r = sd_event_source_set_priority(new_event, SD_EVENT_PRIORITY_NORMAL-1); + if (r < 0) + return log_error_errno(r, "Failed to set priority of timezone change event sources: %m"); + sd_event_source_unref(m->timezone_change_event_source); - m->timezone_change_event_source = new_event; + m->timezone_change_event_source = TAKE_PTR(new_event); return 0; } From d08eb1fabdf701a6482a971787a9680d4d81ced5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 6 Jun 2018 10:49:27 +0200 Subject: [PATCH 13/14] sd-event: use structure initialization instead of new0() where possible --- src/libsystemd/sd-event/sd-event.c | 55 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 82209e4251..1b983dbe37 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -504,16 +504,32 @@ _public_ int sd_event_new(sd_event** ret) { assert_return(ret, -EINVAL); - e = new0(sd_event, 1); + e = new(sd_event, 1); if (!e) return -ENOMEM; - e->n_ref = 1; - e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; - e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY; - e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA; - e->original_pid = getpid_cached(); - e->perturb = USEC_INFINITY; + *e = (sd_event) { + .n_ref = 1, + .epoll_fd = -1, + .watchdog_fd = -1, + .realtime.wakeup = WAKEUP_CLOCK_DATA, + .realtime.fd = -1, + .realtime.next = USEC_INFINITY, + .boottime.wakeup = WAKEUP_CLOCK_DATA, + .boottime.fd = -1, + .boottime.next = USEC_INFINITY, + .monotonic.wakeup = WAKEUP_CLOCK_DATA, + .monotonic.fd = -1, + .monotonic.next = USEC_INFINITY, + .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA, + .realtime_alarm.fd = -1, + .realtime_alarm.next = USEC_INFINITY, + .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA, + .boottime_alarm.fd = -1, + .boottime_alarm.next = USEC_INFINITY, + .perturb = USEC_INFINITY, + .original_pid = getpid_cached(), + }; r = prioq_ensure_allocated(&e->pending, pending_prioq_compare); if (r < 0) @@ -730,13 +746,15 @@ static int event_make_signal_data( if (r < 0) return r; - d = new0(struct signal_data, 1); + d = new(struct signal_data, 1); if (!d) return -ENOMEM; - d->wakeup = WAKEUP_SIGNAL_DATA; - d->fd = -1; - d->priority = priority; + *d = (struct signal_data) { + .wakeup = WAKEUP_SIGNAL_DATA, + .fd = -1, + .priority = priority, + }; r = hashmap_put(e->signal_data, &d->priority, d); if (r < 0) { @@ -1066,15 +1084,18 @@ static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType t assert(e); - s = new0(sd_event_source, 1); + s = new(sd_event_source, 1); if (!s) return NULL; - s->n_ref = 1; - s->event = e; - s->floating = floating; - s->type = type; - s->pending_index = s->prepare_index = PRIOQ_IDX_NULL; + *s = (struct sd_event_source) { + .n_ref = 1, + .event = e, + .floating = floating, + .type = type, + .pending_index = PRIOQ_IDX_NULL, + .prepare_index = PRIOQ_IDX_NULL, + }; if (!floating) sd_event_ref(e); From cd710e6ab9ce173cca22db7f1bc3a939c202308f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 29 May 2018 16:20:46 +0200 Subject: [PATCH 14/14] update TODO --- TODO | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/TODO b/TODO index d6d9901e64..ee944505ef 100644 --- a/TODO +++ b/TODO @@ -24,16 +24,13 @@ Janitorial Clean-ups: Features: +* Add OnTimezoneChange= and OnTimeChange= stanzas to .timer units in order to + schedule events based on time and timezone changes. + * add O_TMPFILE support to copy_file_atomic() * nspawn: greater control over selinux label? -* sd-event: implement inotify events, as we can safely and robustly do that now - for any inode without fearing confusion by inodes appearing at multiple - places: we can open it with O_PATH first, then store its inode in a hash - table, to recognize duplicate watches before creating (and thus corrupting - pre-existing ones) them, and using /proc/self/fd/ to add it right after. - * the error paths in usbffs_dispatch_ep() leak memory * cgroups: figure out if we can somehow communicate in a cleaner way whether a