diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 6f2075a33a..1bc70cc1f2 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -563,4 +563,6 @@ global: sd_bus_open_system_with_description; sd_bus_slot_get_floating; sd_bus_slot_set_floating; + sd_event_add_inotify; + sd_event_source_get_inotify_mask; } LIBSYSTEMD_238; diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index fd8ba14ce1..82209e4251 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -15,6 +15,7 @@ #include "alloc-util.h" #include "fd-util.h" +#include "fs-util.h" #include "hashmap.h" #include "list.h" #include "macro.h" @@ -43,6 +44,7 @@ typedef enum EventSourceType { SOURCE_POST, SOURCE_EXIT, SOURCE_WATCHDOG, + SOURCE_INOTIFY, _SOURCE_EVENT_SOURCE_TYPE_MAX, _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1 } EventSourceType; @@ -60,6 +62,7 @@ static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] [SOURCE_POST] = "post", [SOURCE_EXIT] = "exit", [SOURCE_WATCHDOG] = "watchdog", + [SOURCE_INOTIFY] = "inotify", }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); @@ -71,12 +74,15 @@ typedef enum WakeupType { WAKEUP_EVENT_SOURCE, WAKEUP_CLOCK_DATA, WAKEUP_SIGNAL_DATA, + WAKEUP_INOTIFY_DATA, _WAKEUP_TYPE_MAX, _WAKEUP_TYPE_INVALID = -1, } WakeupType; #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM) +struct inode_data; + struct sd_event_source { WakeupType wakeup; @@ -138,6 +144,12 @@ struct sd_event_source { sd_event_handler_t callback; unsigned prioq_index; } exit; + struct { + sd_event_inotify_handler_t callback; + uint32_t mask; + struct inode_data *inode_data; + LIST_FIELDS(sd_event_source, by_inode_data); + } inotify; }; }; @@ -172,6 +184,64 @@ struct signal_data { sd_event_source *current; }; +/* A structure listing all event sources currently watching a specific inode */ +struct inode_data { + /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */ + ino_t ino; + dev_t dev; + + /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can + * rearrange the priority still until then, as we need the original inode to change the priority as we need to + * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the + * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of + * the sd-event object, so that it is efficient to close everything, before entering the next event loop + * iteration. */ + int fd; + + /* The inotify "watch descriptor" */ + int wd; + + /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has + * most recently been set on the watch descriptor. */ + uint32_t combined_mask; + + /* All event sources subscribed to this inode */ + LIST_HEAD(sd_event_source, event_sources); + + /* The inotify object we watch this inode with */ + struct inotify_data *inotify_data; + + /* A linked list of all inode data objects with fds to close (see above) */ + LIST_FIELDS(struct inode_data, to_close); +}; + +/* A structure encapsulating an inotify fd */ +struct inotify_data { + WakeupType wakeup; + + /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at + * a time */ + + int fd; + int64_t priority; + + Hashmap *inodes; /* The inode_data structures keyed by dev+ino */ + Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */ + + /* The buffer we read inotify events into */ + union inotify_event_buffer buffer; + size_t buffer_filled; /* fill level of the buffer */ + + /* How many event sources are currently marked pending for this inotify. We won't read new events off the + * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing + * the events locally if they can't be coalesced). */ + unsigned n_pending; + + /* A linked list of all inotify objects with data already read, that still need processing. We keep this list + * to make it efficient to figure out what inotify objects to process data on next. */ + LIST_FIELDS(struct inotify_data, buffered); +}; + struct sd_event { unsigned n_ref; @@ -202,6 +272,14 @@ struct sd_event { Prioq *exit; + Hashmap *inotify_data; /* indexed by priority */ + + /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */ + LIST_HEAD(struct inode_data, inode_data_to_close); + + /* A list of inotify objects that already have events buffered which aren't processed yet */ + LIST_HEAD(struct inotify_data, inotify_data_buffered); + pid_t original_pid; uint64_t iteration; @@ -231,6 +309,7 @@ struct sd_event { static thread_local sd_event *default_event = NULL; static void source_disconnect(sd_event_source *s); +static void event_gc_inode_data(sd_event *e, struct inode_data *d); static sd_event *event_resolve(sd_event *e) { return e == SD_EVENT_DEFAULT ? default_event : e; @@ -412,6 +491,8 @@ static void event_free(sd_event *e) { free(e->signal_sources); hashmap_free(e->signal_data); + hashmap_free(e->inotify_data); + hashmap_free(e->child_sources); set_free(e->post_sources); free(e); @@ -855,6 +936,41 @@ static void source_disconnect(sd_event_source *s) { prioq_remove(s->event->exit, s, &s->exit.prioq_index); break; + case SOURCE_INOTIFY: { + struct inode_data *inode_data; + + inode_data = s->inotify.inode_data; + if (inode_data) { + struct inotify_data *inotify_data; + assert_se(inotify_data = inode_data->inotify_data); + + /* Detach this event source from the inode object */ + LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s); + s->inotify.inode_data = NULL; + + if (s->pending) { + assert(inotify_data->n_pending > 0); + inotify_data->n_pending--; + } + + /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is + * continued to being watched. That's because inotify doesn't really have an API for that: we + * can only change watch masks with access to the original inode either by fd or by path. But + * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd + * continously and keeping the mount busy which we can't really do. We could reconstruct the + * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed + * there), but given the need for open_by_handle_at() which is privileged and not universally + * available this would be quite an incomplete solution. Hence we go the other way, leave the + * mask set, even if it is not minimized now, and ignore all events we aren't interested in + * anymore after reception. Yes, this sucks, but … Linux … */ + + /* Maybe release the inode data (and its inotify) */ + event_gc_inode_data(s->event, inode_data); + } + + break; + } + default: assert_not_reached("Wut? I shouldn't exist."); } @@ -929,6 +1045,19 @@ static int source_set_pending(sd_event_source *s, bool b) { d->current = NULL; } + if (s->type == SOURCE_INOTIFY) { + + assert(s->inotify.inode_data); + assert(s->inotify.inode_data->inotify_data); + + if (b) + s->inotify.inode_data->inotify_data->n_pending ++; + else { + assert(s->inotify.inode_data->inotify_data->n_pending > 0); + s->inotify.inode_data->inotify_data->n_pending --; + } + } + return 0; } @@ -1377,6 +1506,405 @@ _public_ int sd_event_add_exit( return 0; } +static void event_free_inotify_data(sd_event *e, struct inotify_data *d) { + assert(e); + + if (!d) + return; + + assert(hashmap_isempty(d->inodes)); + assert(hashmap_isempty(d->wd)); + + if (d->buffer_filled > 0) + LIST_REMOVE(buffered, e->inotify_data_buffered, d); + + hashmap_free(d->inodes); + hashmap_free(d->wd); + + assert_se(hashmap_remove(e->inotify_data, &d->priority) == d); + + if (d->fd >= 0) { + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m"); + + safe_close(d->fd); + } + free(d); +} + +static int event_make_inotify_data( + sd_event *e, + int64_t priority, + struct inotify_data **ret) { + + _cleanup_close_ int fd = -1; + struct inotify_data *d; + struct epoll_event ev; + int r; + + assert(e); + + d = hashmap_get(e->inotify_data, &priority); + if (d) { + if (ret) + *ret = d; + return 0; + } + + fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + + r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops); + if (r < 0) + return r; + + d = new(struct inotify_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct inotify_data) { + .wakeup = WAKEUP_INOTIFY_DATA, + .fd = TAKE_FD(fd), + .priority = priority, + }; + + r = hashmap_put(e->inotify_data, &d->priority, d); + if (r < 0) { + d->fd = safe_close(d->fd); + free(d); + return r; + } + + ev = (struct epoll_event) { + .events = EPOLLIN, + .data.ptr = d, + }; + + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) { + r = -errno; + d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise + * remove the fd from the epoll first, which we don't want as we couldn't + * add it in the first place. */ + event_free_inotify_data(e, d); + return r; + } + + if (ret) + *ret = d; + + return 1; +} + +static int inode_data_compare(const void *a, const void *b) { + const struct inode_data *x = a, *y = b; + + assert(x); + assert(y); + + if (x->dev < y->dev) + return -1; + if (x->dev > y->dev) + return 1; + + if (x->ino < y->ino) + return -1; + if (x->ino > y->ino) + return 1; + + return 0; +} + +static void inode_data_hash_func(const void *p, struct siphash *state) { + const struct inode_data *d = p; + + assert(p); + + siphash24_compress(&d->dev, sizeof(d->dev), state); + siphash24_compress(&d->ino, sizeof(d->ino), state); +} + +const struct hash_ops inode_data_hash_ops = { + .hash = inode_data_hash_func, + .compare = inode_data_compare +}; + +static void event_free_inode_data( + sd_event *e, + struct inode_data *d) { + + assert(e); + + if (!d) + return; + + assert(!d->event_sources); + + if (d->fd >= 0) { + LIST_REMOVE(to_close, e->inode_data_to_close, d); + safe_close(d->fd); + } + + if (d->inotify_data) { + + if (d->wd >= 0) { + if (d->inotify_data->fd >= 0) { + /* So here's a problem. At the time this runs the watch descriptor might already be + * invalidated, because an IN_IGNORED event might be queued right the moment we enter + * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very + * likely case to happen. */ + + if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL) + log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd); + } + + assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d); + } + + assert_se(hashmap_remove(d->inotify_data->inodes, d) == d); + } + + free(d); +} + +static void event_gc_inode_data( + sd_event *e, + struct inode_data *d) { + + struct inotify_data *inotify_data; + + assert(e); + + if (!d) + return; + + if (d->event_sources) + return; + + inotify_data = d->inotify_data; + event_free_inode_data(e, d); + + if (inotify_data && hashmap_isempty(inotify_data->inodes)) + event_free_inotify_data(e, inotify_data); +} + +static int event_make_inode_data( + sd_event *e, + struct inotify_data *inotify_data, + dev_t dev, + ino_t ino, + struct inode_data **ret) { + + struct inode_data *d, key; + int r; + + assert(e); + assert(inotify_data); + + key = (struct inode_data) { + .ino = ino, + .dev = dev, + }; + + d = hashmap_get(inotify_data->inodes, &key); + if (d) { + if (ret) + *ret = d; + + return 0; + } + + r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops); + if (r < 0) + return r; + + d = new(struct inode_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct inode_data) { + .dev = dev, + .ino = ino, + .wd = -1, + .fd = -1, + .inotify_data = inotify_data, + }; + + r = hashmap_put(inotify_data->inodes, d, d); + if (r < 0) { + free(d); + return r; + } + + if (ret) + *ret = d; + + return 1; +} + +static uint32_t inode_data_determine_mask(struct inode_data *d) { + bool excl_unlink = true; + uint32_t combined = 0; + sd_event_source *s; + + assert(d); + + /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but + * the IN_EXCL_UNLINK flag is ANDed instead. + * + * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's + * because we cannot change the mask anymore after the event source was created once, since the kernel has no + * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress + * events we don't care for client-side. */ + + LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) { + + if ((s->inotify.mask & IN_EXCL_UNLINK) == 0) + excl_unlink = false; + + combined |= s->inotify.mask; + } + + return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0); +} + +static int inode_data_realize_watch(sd_event *e, struct inode_data *d) { + uint32_t combined_mask; + int wd, r; + + assert(d); + assert(d->fd >= 0); + + combined_mask = inode_data_determine_mask(d); + + if (d->wd >= 0 && combined_mask == d->combined_mask) + return 0; + + r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL); + if (r < 0) + return r; + + wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask); + if (wd < 0) + return -errno; + + if (d->wd < 0) { + r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d); + if (r < 0) { + (void) inotify_rm_watch(d->inotify_data->fd, wd); + return r; + } + + d->wd = wd; + + } else if (d->wd != wd) { + + log_debug("Weird, the watch descriptor we already knew for this inode changed?"); + (void) inotify_rm_watch(d->fd, wd); + return -EINVAL; + } + + d->combined_mask = combined_mask; + return 1; +} + +_public_ int sd_event_add_inotify( + sd_event *e, + sd_event_source **ret, + const char *path, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { + + bool rm_inotify = false, rm_inode = false; + struct inotify_data *inotify_data = NULL; + struct inode_data *inode_data = NULL; + _cleanup_close_ int fd = -1; + sd_event_source *s; + struct stat st; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(path, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_pid_changed(e), -ECHILD); + + /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge + * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence + * the user can't use them for us. */ + if (mask & IN_MASK_ADD) + return -EINVAL; + + fd = open(path, O_PATH|O_CLOEXEC| + (mask & IN_ONLYDIR ? O_DIRECTORY : 0)| + (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + s = source_new(e, !ret, SOURCE_INOTIFY); + if (!s) + return -ENOMEM; + + s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON; + s->inotify.mask = mask; + s->inotify.callback = callback; + s->userdata = userdata; + + /* Allocate an inotify object for this priority, and an inode object within it */ + r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data); + if (r < 0) + goto fail; + rm_inotify = r > 0; + + r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data); + if (r < 0) + goto fail; + rm_inode = r > 0; + + /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of + * the event source, until then, for which we need the original inode. */ + if (inode_data->fd < 0) { + inode_data->fd = TAKE_FD(fd); + LIST_PREPEND(to_close, e->inode_data_to_close, inode_data); + } + + /* Link our event source to the inode data object */ + LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s); + s->inotify.inode_data = inode_data; + + rm_inode = rm_inotify = false; + + /* Actually realize the watch now */ + r = inode_data_realize_watch(e, inode_data); + if (r < 0) + goto fail; + + (void) sd_event_source_set_description(s, path); + + if (ret) + *ret = s; + + return 0; + +fail: + source_free(s); + + if (rm_inode) + event_free_inode_data(e, inode_data); + + if (rm_inotify) + event_free_inotify_data(e, inotify_data); + + return r; +} + _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) { if (!s) @@ -1574,6 +2102,9 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) } _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) { + bool rm_inotify = false, rm_inode = false; + struct inotify_data *new_inotify_data = NULL; + struct inode_data *new_inode_data = NULL; int r; assert_return(s, -EINVAL); @@ -1583,7 +2114,59 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) if (s->priority == priority) return 0; - if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { + if (s->type == SOURCE_INOTIFY) { + struct inode_data *old_inode_data; + + assert(s->inotify.inode_data); + old_inode_data = s->inotify.inode_data; + + /* We need the original fd to change the priority. If we don't have it we can't change the priority, + * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify + * events we allow priority changes only until the first following iteration. */ + if (old_inode_data->fd < 0) + return -EOPNOTSUPP; + + r = event_make_inotify_data(s->event, priority, &new_inotify_data); + if (r < 0) + return r; + rm_inotify = r > 0; + + r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data); + if (r < 0) + goto fail; + rm_inode = r > 0; + + if (new_inode_data->fd < 0) { + /* Duplicate the fd for the new inode object if we don't have any yet */ + new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3); + if (new_inode_data->fd < 0) { + r = -errno; + goto fail; + } + + LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data); + } + + /* Move the event source to the new inode data structure */ + LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s); + LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s); + s->inotify.inode_data = new_inode_data; + + /* Now create the new watch */ + r = inode_data_realize_watch(s->event, new_inode_data); + if (r < 0) { + /* Move it back */ + LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s); + LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s); + s->inotify.inode_data = old_inode_data; + goto fail; + } + + s->priority = priority; + + event_gc_inode_data(s->event, old_inode_data); + + } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { struct signal_data *old, *d; /* Move us from the signalfd belonging to the old @@ -1613,6 +2196,15 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); return 0; + +fail: + if (rm_inode) + event_free_inode_data(s->event, new_inode_data); + + if (rm_inotify) + event_free_inotify_data(s->event, new_inotify_data); + + return r; } _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) { @@ -1694,6 +2286,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { case SOURCE_DEFER: case SOURCE_POST: + case SOURCE_INOTIFY: s->enabled = m; break; @@ -1774,6 +2367,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { case SOURCE_DEFER: case SOURCE_POST: + case SOURCE_INOTIFY: s->enabled = m; break; @@ -1884,6 +2478,16 @@ _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { return 0; } +_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) { + assert_return(s, -EINVAL); + assert_return(mask, -EINVAL); + assert_return(s->type == SOURCE_INOTIFY, -EDOM); + assert_return(!event_pid_changed(s->event), -ECHILD); + + *mask = s->inotify.mask; + return 0; +} + _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) { int r; @@ -2217,6 +2821,7 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { int r; assert(e); + assert(d); assert_return(events == EPOLLIN, -EIO); /* If there's a signal queued on this priority and SIGCHLD is @@ -2273,6 +2878,160 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { } } +static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) { + ssize_t n; + + assert(e); + assert(d); + + assert_return(revents == EPOLLIN, -EIO); + + /* If there's already an event source pending for this priority, don't read another */ + if (d->n_pending > 0) + return 0; + + /* Is the read buffer non-empty? If so, let's not read more */ + if (d->buffer_filled > 0) + return 0; + + n = read(d->fd, &d->buffer, sizeof(d->buffer)); + if (n < 0) { + if (IN_SET(errno, EAGAIN, EINTR)) + return 0; + + return -errno; + } + + assert(n > 0); + d->buffer_filled = (size_t) n; + LIST_PREPEND(buffered, e->inotify_data_buffered, d); + + return 1; +} + +static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) { + assert(e); + assert(d); + assert(sz <= d->buffer_filled); + + if (sz == 0) + return; + + /* Move the rest to the buffer to the front, in order to get things properly aligned again */ + memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz); + d->buffer_filled -= sz; + + if (d->buffer_filled == 0) + LIST_REMOVE(buffered, e->inotify_data_buffered, d); +} + +static int event_inotify_data_process(sd_event *e, struct inotify_data *d) { + int r; + + assert(e); + assert(d); + + /* If there's already an event source pending for this priority, don't read another */ + if (d->n_pending > 0) + return 0; + + while (d->buffer_filled > 0) { + size_t sz; + + /* Let's validate that the event structures are complete */ + if (d->buffer_filled < offsetof(struct inotify_event, name)) + return -EIO; + + sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; + if (d->buffer_filled < sz) + return -EIO; + + if (d->buffer.ev.mask & IN_Q_OVERFLOW) { + struct inode_data *inode_data; + Iterator i; + + /* The queue overran, let's pass this event to all event sources connected to this inotify + * object */ + + HASHMAP_FOREACH(inode_data, d->inodes, i) { + sd_event_source *s; + + LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) { + + if (s->enabled == SD_EVENT_OFF) + continue; + + r = source_set_pending(s, true); + if (r < 0) + return r; + } + } + } else { + struct inode_data *inode_data; + sd_event_source *s; + + /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from + * our watch descriptor table. */ + if (d->buffer.ev.mask & IN_IGNORED) { + + inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd)); + if (!inode_data) { + event_inotify_data_drop(e, d, sz); + continue; + } + + /* The watch descriptor was removed by the kernel, let's drop it here too */ + inode_data->wd = -1; + } else { + inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd)); + if (!inode_data) { + event_inotify_data_drop(e, d, sz); + continue; + } + } + + /* Trigger all event sources that are interested in these events. Also trigger all event + * sources if IN_IGNORED or IN_UNMOUNT is set. */ + LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) { + + if (s->enabled == SD_EVENT_OFF) + continue; + + if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 && + (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0) + continue; + + r = source_set_pending(s, true); + if (r < 0) + return r; + } + } + + /* Something pending now? If so, let's finish, otherwise let's read more. */ + if (d->n_pending > 0) + return 1; + } + + return 0; +} + +static int process_inotify(sd_event *e) { + struct inotify_data *d; + int r, done = 0; + + assert(e); + + LIST_FOREACH(buffered, d, e->inotify_data_buffered) { + r = event_inotify_data_process(e, d); + if (r < 0) + return r; + if (r > 0) + done ++; + } + + return done; +} + static int source_dispatch(sd_event_source *s) { EventSourceType saved_type; int r = 0; @@ -2359,6 +3118,28 @@ static int source_dispatch(sd_event_source *s) { r = s->exit.callback(s, s->userdata); break; + case SOURCE_INOTIFY: { + struct sd_event *e = s->event; + struct inotify_data *d; + size_t sz; + + assert(s->inotify.inode_data); + assert_se(d = s->inotify.inode_data->inotify_data); + + assert(d->buffer_filled >= offsetof(struct inotify_event, name)); + sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; + assert(d->buffer_filled >= sz); + + r = s->inotify.callback(s, &d->buffer.ev, s->userdata); + + /* When no event is pending anymore on this inotify object, then let's drop the event from the + * buffer. */ + if (d->n_pending == 0) + event_inotify_data_drop(e, d, sz); + + break; + } + case SOURCE_WATCHDOG: case _SOURCE_EVENT_SOURCE_TYPE_MAX: case _SOURCE_EVENT_SOURCE_TYPE_INVALID: @@ -2493,6 +3274,25 @@ static int process_watchdog(sd_event *e) { return arm_watchdog(e); } +static void event_close_inode_data_fds(sd_event *e) { + struct inode_data *d; + + assert(e); + + /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin + * filesystems. But we can't close them right-away as we need them as long as the user still wants to make + * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch + * for the inode). Hence, let's close them when entering the first iteration after they were added, as a + * compromise. */ + + while ((d = e->inode_data_to_close)) { + assert(d->fd >= 0); + d->fd = safe_close(d->fd); + + LIST_REMOVE(to_close, e->inode_data_to_close, d); + } +} + _public_ int sd_event_prepare(sd_event *e) { int r; @@ -2533,6 +3333,8 @@ _public_ int sd_event_prepare(sd_event *e) { if (r < 0) return r; + event_close_inode_data_fds(e); + if (event_next_pending(e) || e->need_process_child) goto pending; @@ -2568,6 +3370,10 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { ev_queue_max = MAX(e->n_sources, 1u); ev_queue = newa(struct epoll_event, ev_queue_max); + /* If we still have inotify data buffered, then query the other fds, but don't wait on it */ + if (e->inotify_data_buffered) + timeout = 0; + m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max, timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC)); if (m < 0) { @@ -2605,6 +3411,10 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events); break; + case WAKEUP_INOTIFY_DATA: + r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + default: assert_not_reached("Invalid wake-up pointer"); } @@ -2643,6 +3453,10 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { goto finish; } + r = process_inotify(e); + if (r < 0) + goto finish; + if (event_next_pending(e)) { e->state = SD_EVENT_PENDING; diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h index ec4b7bcf69..28939ca550 100644 --- a/src/systemd/sd-event.h +++ b/src/systemd/sd-event.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,7 @@ typedef int (*sd_event_child_handler_t)(sd_event_source *s, const siginfo_t *si, #else typedef void* sd_event_child_handler_t; #endif +typedef int (*sd_event_inotify_handler_t)(sd_event_source *s, const struct inotify_event *event, void *userdata); int sd_event_default(sd_event **e); @@ -89,6 +91,7 @@ int sd_event_add_io(sd_event *e, sd_event_source **s, int fd, uint32_t events, s int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata); int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata); int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata); +int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata); int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); @@ -139,6 +142,7 @@ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec); int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock); int sd_event_source_get_signal(sd_event_source *s); int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid); +int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret); /* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */ _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref);