Systemd/src/systemd/sd-event.h
Lennart Poettering 97ef539169 sd-event: add new API for subscribing to inotify events
This adds a new call sd_event_add_inotify() which allows watching for
inotify events on specified paths.

sd-event will try to minimize the number of inotify fds allocated, and
will try to add file watches to the same inotify fd objects as far as
that's possible. Doing this kind of inotify object should optimize
behaviour in programs that watch a limited set of mostly independent
files as in most cases a single inotify object will suffice for watching
all files.

Traditionally, this kind of coalescing logic (i.e. that multiple event
sources are implemented on top of a single inotify object) was very hard
to do, as the inotify API had serious limitations: it only allowed
adding watches by path, and would implicitly merge watches installed on
the same node via different path, without letting the caller know about
whether such merging took place or not.

With the advent of O_PATH this issue can be dealt with to some point:
instead of adding a path to watch to an inotify object with
inotify_add_watch() right away, we can open the path with O_PATH first,
call fstat() on the fd, and check the .st_dev/.st_ino fields of that
against a list of watches we already have in place. If we find one we
know that the inotify_add_watch() will update the watch mask of the
existing watch, otherwise it will create a new watch. To make this
race-free we use inotify_add_watch() on the /proc/self/fd/ path of the
O_PATH fd, instead of the original path, so that we do the checking and
watch updating with guaranteed the same inode.

This approach let's us deal safely with inodes that may appear under
various different paths (due to symlinks, hardlinks, bind mounts, fs
namespaces). However it's not a perfect solution: currently the kernel
has no API for changing the watch mask of an existing watch -- unless
you have a path or fd to the original inode. This means we can "merge"
the watches of the same inode of multiple event sources correctly, but
we cannot "unmerge" it again correctly in many cases, as access to the
original inode might have been lost, due to renames, mount/unmount, or
deletions. We could in theory always keep open an O_PATH fd of the inode
to watch so that we can change the mask anytime we want, but this is
highly problematics, as it would consume too many fds (and in fact the
scarcity of fds is the reason why watch descriptors are a separate
concepts from fds) and would keep the backing mounts busy (wds do not
keep mounts busy, fds do). The current implemented approach to all this:
filter in userspace and accept that the watch mask on some inode might
be higher than necessary due to earlier installed event sources that
might have ceased to exist. This approach while ugly shouldn't be too
bad for most cases as the same inodes are probably wacthed for the same
masks in most implementations.

In order to implement priorities correctly a seperate inotify object is
allocated for each priority that is used. This way we get separate
per-priority event queues, of which we never dequeue more than a few
events at a time.

Fixes: #3982
2018-06-06 10:53:56 +02:00

154 lines
6.3 KiB
C

/* SPDX-License-Identifier: LGPL-2.1+ */
#ifndef foosdeventhfoo
#define foosdeventhfoo
/***
This file is part of systemd.
Copyright 2013 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <inttypes.h>
#include <signal.h>
#include <sys/epoll.h>
#include <sys/inotify.h>
#include <sys/signalfd.h>
#include <sys/types.h>
#include <time.h>
#include "_sd-common.h"
/*
Why is this better than pure epoll?
- Supports event source prioritization
- Scales better with a large number of time events because it does not require one timerfd each
- Automatically tries to coalesce timer events system-wide
- Handles signals and child PIDs
*/
_SD_BEGIN_DECLARATIONS;
#define SD_EVENT_DEFAULT ((sd_event *) 1)
typedef struct sd_event sd_event;
typedef struct sd_event_source sd_event_source;
enum {
SD_EVENT_OFF = 0,
SD_EVENT_ON = 1,
SD_EVENT_ONESHOT = -1
};
enum {
SD_EVENT_INITIAL,
SD_EVENT_ARMED,
SD_EVENT_PENDING,
SD_EVENT_RUNNING,
SD_EVENT_EXITING,
SD_EVENT_FINISHED,
SD_EVENT_PREPARING
};
enum {
/* And everything in-between and outside is good too */
SD_EVENT_PRIORITY_IMPORTANT = -100,
SD_EVENT_PRIORITY_NORMAL = 0,
SD_EVENT_PRIORITY_IDLE = 100
};
typedef int (*sd_event_handler_t)(sd_event_source *s, void *userdata);
typedef int (*sd_event_io_handler_t)(sd_event_source *s, int fd, uint32_t revents, void *userdata);
typedef int (*sd_event_time_handler_t)(sd_event_source *s, uint64_t usec, void *userdata);
typedef int (*sd_event_signal_handler_t)(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata);
#if defined _GNU_SOURCE || _POSIX_C_SOURCE >= 199309L
typedef int (*sd_event_child_handler_t)(sd_event_source *s, const siginfo_t *si, void *userdata);
#else
typedef void* sd_event_child_handler_t;
#endif
typedef int (*sd_event_inotify_handler_t)(sd_event_source *s, const struct inotify_event *event, void *userdata);
int sd_event_default(sd_event **e);
int sd_event_new(sd_event **e);
sd_event* sd_event_ref(sd_event *e);
sd_event* sd_event_unref(sd_event *e);
int sd_event_add_io(sd_event *e, sd_event_source **s, int fd, uint32_t events, sd_event_io_handler_t callback, void *userdata);
int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata);
int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata);
int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata);
int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata);
int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
int sd_event_prepare(sd_event *e);
int sd_event_wait(sd_event *e, uint64_t usec);
int sd_event_dispatch(sd_event *e);
int sd_event_run(sd_event *e, uint64_t usec);
int sd_event_loop(sd_event *e);
int sd_event_exit(sd_event *e, int code);
int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec);
int sd_event_get_fd(sd_event *e);
int sd_event_get_state(sd_event *e);
int sd_event_get_tid(sd_event *e, pid_t *tid);
int sd_event_get_exit_code(sd_event *e, int *code);
int sd_event_set_watchdog(sd_event *e, int b);
int sd_event_get_watchdog(sd_event *e);
int sd_event_get_iteration(sd_event *e, uint64_t *ret);
sd_event_source* sd_event_source_ref(sd_event_source *s);
sd_event_source* sd_event_source_unref(sd_event_source *s);
sd_event *sd_event_source_get_event(sd_event_source *s);
void* sd_event_source_get_userdata(sd_event_source *s);
void* sd_event_source_set_userdata(sd_event_source *s, void *userdata);
int sd_event_source_set_description(sd_event_source *s, const char *description);
int sd_event_source_get_description(sd_event_source *s, const char **description);
int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback);
int sd_event_source_get_pending(sd_event_source *s);
int sd_event_source_get_priority(sd_event_source *s, int64_t *priority);
int sd_event_source_set_priority(sd_event_source *s, int64_t priority);
int sd_event_source_get_enabled(sd_event_source *s, int *enabled);
int sd_event_source_set_enabled(sd_event_source *s, int enabled);
int sd_event_source_get_io_fd(sd_event_source *s);
int sd_event_source_set_io_fd(sd_event_source *s, int fd);
int sd_event_source_get_io_fd_own(sd_event_source *s);
int sd_event_source_set_io_fd_own(sd_event_source *s, int own);
int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events);
int sd_event_source_set_io_events(sd_event_source *s, uint32_t events);
int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents);
int sd_event_source_get_time(sd_event_source *s, uint64_t *usec);
int sd_event_source_set_time(sd_event_source *s, uint64_t usec);
int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec);
int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec);
int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock);
int sd_event_source_get_signal(sd_event_source *s);
int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid);
int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret);
/* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */
_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref);
_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event_source, sd_event_source_unref);
_SD_END_DECLARATIONS;
#endif