sd-event: add pidfd support
This adds support for watching for process exits via Linux new pidfd concept. This makes watching processes and killing them race-free if properly used, fixing a long-standing UNIX misdesign. This patch adds implicit and explicit pidfd support to sd-event: if a process shall be watched and is specified by PID we will now internally create a pidfd for it and use that, if available. Alternatively a new constructor for child process event sources is added that takes pidfds as input. Besides mere watching of child processes via pidfd two additional features are added: → sd_event_source_send_child_signal() allows sending a signal to the process being watched in the safest way possible (wrapping the new pidfd_send_signal() syscall). → sd_event_source_set_child_process_own() allows marking a process watched for destruction as soon as the event source is freed. This is currently implemented in userspace, but hopefully will become a kernel feature eventually. Altogether this means an sd_event_source object is now a safe and stable concept for referencing processes in race-free way, with automatic fallback to pre-pidfd kernels. Note that this patch adds support for this only to sd-event, not to PID 1. That's because PID 1 needs to use waitid(P_ALL) for reaping any process that might get reparented to it. This currently semantically conflicts with pidfd use for watching processes since we P_ALL is undirected and thus might reap process earlier than the pidfd notifies process end, which is hard to handle. The kernel will likely gain a concept for excluding specific pidfds from P_ALL watching, as soon as that is around we can start making use of this in PID 1 too.
This commit is contained in:
parent
298f466f15
commit
f8f3f9263e
|
@ -682,3 +682,14 @@ global:
|
||||||
sd_bus_object_vtable_format;
|
sd_bus_object_vtable_format;
|
||||||
sd_event_source_disable_unref;
|
sd_event_source_disable_unref;
|
||||||
} LIBSYSTEMD_241;
|
} LIBSYSTEMD_241;
|
||||||
|
|
||||||
|
LIBSYSTEMD_245 {
|
||||||
|
global:
|
||||||
|
sd_event_add_child_pidfd;
|
||||||
|
sd_event_source_get_child_pidfd;
|
||||||
|
sd_event_source_get_child_pidfd_own;
|
||||||
|
sd_event_source_set_child_pidfd_own;
|
||||||
|
sd_event_source_get_child_process_own;
|
||||||
|
sd_event_source_set_child_process_own;
|
||||||
|
sd_event_source_send_child_signal;
|
||||||
|
} LIBSYSTEMD_243;
|
||||||
|
|
|
@ -34,7 +34,7 @@ typedef enum EventSourceType {
|
||||||
* we know how to dispatch it */
|
* we know how to dispatch it */
|
||||||
typedef enum WakeupType {
|
typedef enum WakeupType {
|
||||||
WAKEUP_NONE,
|
WAKEUP_NONE,
|
||||||
WAKEUP_EVENT_SOURCE,
|
WAKEUP_EVENT_SOURCE, /* either I/O or pidfd wakeup */
|
||||||
WAKEUP_CLOCK_DATA,
|
WAKEUP_CLOCK_DATA,
|
||||||
WAKEUP_SIGNAL_DATA,
|
WAKEUP_SIGNAL_DATA,
|
||||||
WAKEUP_INOTIFY_DATA,
|
WAKEUP_INOTIFY_DATA,
|
||||||
|
@ -96,6 +96,12 @@ struct sd_event_source {
|
||||||
siginfo_t siginfo;
|
siginfo_t siginfo;
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
int options;
|
int options;
|
||||||
|
int pidfd;
|
||||||
|
bool registered:1; /* whether the pidfd is registered in the epoll */
|
||||||
|
bool pidfd_owned:1; /* close pidfd when event source is freed */
|
||||||
|
bool process_owned:1; /* kill+reap process when event source is freed */
|
||||||
|
bool exited:1; /* true if process exited (i.e. if there's value in SIGKILLing it if we want to get rid of it) */
|
||||||
|
bool waited:1; /* true if process was waited for (i.e. if there's value in waitid(P_PID)'ing it if we want to get rid of it) */
|
||||||
} child;
|
} child;
|
||||||
struct {
|
struct {
|
||||||
sd_event_handler_t callback;
|
sd_event_handler_t callback;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "sd-id128.h"
|
#include "sd-id128.h"
|
||||||
|
|
||||||
#include "alloc-util.h"
|
#include "alloc-util.h"
|
||||||
|
#include "env-util.h"
|
||||||
#include "event-source.h"
|
#include "event-source.h"
|
||||||
#include "fd-util.h"
|
#include "fd-util.h"
|
||||||
#include "fs-util.h"
|
#include "fs-util.h"
|
||||||
|
@ -28,6 +29,14 @@
|
||||||
|
|
||||||
#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
|
#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
|
||||||
|
|
||||||
|
static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
|
||||||
|
/* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
|
||||||
|
return s &&
|
||||||
|
s->type == SOURCE_CHILD &&
|
||||||
|
s->child.pidfd >= 0 &&
|
||||||
|
s->child.options == WEXITED;
|
||||||
|
}
|
||||||
|
|
||||||
static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
|
static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
|
||||||
[SOURCE_IO] = "io",
|
[SOURCE_IO] = "io",
|
||||||
[SOURCE_TIME_REALTIME] = "realtime",
|
[SOURCE_TIME_REALTIME] = "realtime",
|
||||||
|
@ -401,6 +410,51 @@ static int source_io_register(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void source_child_pidfd_unregister(sd_event_source *s) {
|
||||||
|
assert(s);
|
||||||
|
assert(s->type == SOURCE_CHILD);
|
||||||
|
|
||||||
|
if (event_pid_changed(s->event))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!s->child.registered)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s))
|
||||||
|
if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
|
||||||
|
log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
|
||||||
|
strna(s->description), event_source_type_to_string(s->type));
|
||||||
|
|
||||||
|
s->child.registered = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int source_child_pidfd_register(sd_event_source *s, int enabled) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(s);
|
||||||
|
assert(s->type == SOURCE_CHILD);
|
||||||
|
assert(enabled != SD_EVENT_OFF);
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||||
|
struct epoll_event ev;
|
||||||
|
|
||||||
|
ev = (struct epoll_event) {
|
||||||
|
.events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
|
||||||
|
.data.ptr = s,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (s->child.registered)
|
||||||
|
r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
|
||||||
|
else
|
||||||
|
r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
|
||||||
|
if (r < 0)
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
s->child.registered = true;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static clockid_t event_source_type_to_clock(EventSourceType t) {
|
static clockid_t event_source_type_to_clock(EventSourceType t) {
|
||||||
|
|
||||||
switch (t) {
|
switch (t) {
|
||||||
|
@ -611,9 +665,8 @@ static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig)
|
||||||
|
|
||||||
assert(e);
|
assert(e);
|
||||||
|
|
||||||
/* Rechecks if the specified signal is still something we are
|
/* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
|
||||||
* interested in. If not, we'll unmask it, and possibly drop
|
* and possibly drop the signalfd for it. */
|
||||||
* the signalfd for it. */
|
|
||||||
|
|
||||||
if (sig == SIGCHLD &&
|
if (sig == SIGCHLD &&
|
||||||
e->n_enabled_child_sources > 0)
|
e->n_enabled_child_sources > 0)
|
||||||
|
@ -704,9 +757,13 @@ static void source_disconnect(sd_event_source *s) {
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
|
(void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
|
||||||
event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s))
|
||||||
|
source_child_pidfd_unregister(s);
|
||||||
|
else
|
||||||
|
event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SOURCE_DEFER:
|
case SOURCE_DEFER:
|
||||||
|
@ -787,6 +844,44 @@ static void source_free(sd_event_source *s) {
|
||||||
if (s->type == SOURCE_IO && s->io.owned)
|
if (s->type == SOURCE_IO && s->io.owned)
|
||||||
s->io.fd = safe_close(s->io.fd);
|
s->io.fd = safe_close(s->io.fd);
|
||||||
|
|
||||||
|
if (s->type == SOURCE_CHILD) {
|
||||||
|
/* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
|
||||||
|
|
||||||
|
if (s->child.process_owned) {
|
||||||
|
|
||||||
|
if (!s->child.exited) {
|
||||||
|
bool sent = false;
|
||||||
|
|
||||||
|
if (s->child.pidfd >= 0) {
|
||||||
|
if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
|
||||||
|
if (errno == ESRCH) /* Already dead */
|
||||||
|
sent = true;
|
||||||
|
else if (!ERRNO_IS_NOT_SUPPORTED(errno))
|
||||||
|
log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
|
||||||
|
s->child.pid);
|
||||||
|
} else
|
||||||
|
sent = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sent)
|
||||||
|
if (kill(s->child.pid, SIGKILL) < 0)
|
||||||
|
if (errno != ESRCH) /* Already dead */
|
||||||
|
log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
|
||||||
|
s->child.pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!s->child.waited) {
|
||||||
|
siginfo_t si = {};
|
||||||
|
|
||||||
|
/* Reap the child if we can */
|
||||||
|
(void) waitid(P_PID, s->child.pid, &si, WEXITED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->child.pidfd_owned)
|
||||||
|
s->child.pidfd = safe_close(s->child.pidfd);
|
||||||
|
}
|
||||||
|
|
||||||
if (s->destroy_callback)
|
if (s->destroy_callback)
|
||||||
s->destroy_callback(s->userdata);
|
s->destroy_callback(s->userdata);
|
||||||
|
|
||||||
|
@ -1121,6 +1216,11 @@ _public_ int sd_event_add_signal(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool shall_use_pidfd(void) {
|
||||||
|
/* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
|
||||||
|
return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
|
||||||
|
}
|
||||||
|
|
||||||
_public_ int sd_event_add_child(
|
_public_ int sd_event_add_child(
|
||||||
sd_event *e,
|
sd_event *e,
|
||||||
sd_event_source **ret,
|
sd_event_source **ret,
|
||||||
|
@ -1152,18 +1252,123 @@ _public_ int sd_event_add_child(
|
||||||
if (!s)
|
if (!s)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
s->wakeup = WAKEUP_EVENT_SOURCE;
|
||||||
s->child.pid = pid;
|
s->child.pid = pid;
|
||||||
s->child.options = options;
|
s->child.options = options;
|
||||||
s->child.callback = callback;
|
s->child.callback = callback;
|
||||||
s->userdata = userdata;
|
s->userdata = userdata;
|
||||||
s->enabled = SD_EVENT_ONESHOT;
|
s->enabled = SD_EVENT_ONESHOT;
|
||||||
|
|
||||||
|
/* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
|
||||||
|
* pin the PID, and make regular waitid() handling race-free. */
|
||||||
|
|
||||||
|
if (shall_use_pidfd()) {
|
||||||
|
s->child.pidfd = pidfd_open(s->child.pid, 0);
|
||||||
|
if (s->child.pidfd < 0) {
|
||||||
|
/* Propagate errors unless the syscall is not supported or blocked */
|
||||||
|
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||||
|
return -errno;
|
||||||
|
} else
|
||||||
|
s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
|
||||||
|
} else
|
||||||
|
s->child.pidfd = -1;
|
||||||
|
|
||||||
|
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
e->n_enabled_child_sources++;
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||||
|
/* We have a pidfd and we only want to watch for exit */
|
||||||
|
|
||||||
|
r = source_child_pidfd_register(s, s->enabled);
|
||||||
|
if (r < 0) {
|
||||||
|
e->n_enabled_child_sources--;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* We have no pidfd or we shall wait for some other event than WEXITED */
|
||||||
|
|
||||||
|
r = event_make_signal_data(e, SIGCHLD, NULL);
|
||||||
|
if (r < 0) {
|
||||||
|
e->n_enabled_child_sources--;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
e->need_process_child = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
*ret = s;
|
||||||
|
|
||||||
|
TAKE_PTR(s);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_add_child_pidfd(
|
||||||
|
sd_event *e,
|
||||||
|
sd_event_source **ret,
|
||||||
|
int pidfd,
|
||||||
|
int options,
|
||||||
|
sd_event_child_handler_t callback,
|
||||||
|
void *userdata) {
|
||||||
|
|
||||||
|
|
||||||
|
_cleanup_(source_freep) sd_event_source *s = NULL;
|
||||||
|
pid_t pid;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert_return(e, -EINVAL);
|
||||||
|
assert_return(e = event_resolve(e), -ENOPKG);
|
||||||
|
assert_return(pidfd >= 0, -EBADF);
|
||||||
|
assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
|
||||||
|
assert_return(options != 0, -EINVAL);
|
||||||
|
assert_return(callback, -EINVAL);
|
||||||
|
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
|
||||||
|
assert_return(!event_pid_changed(e), -ECHILD);
|
||||||
|
|
||||||
|
r = hashmap_ensure_allocated(&e->child_sources, NULL);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = pidfd_get_pid(pidfd, &pid);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
|
||||||
|
return -EBUSY;
|
||||||
|
|
||||||
|
s = source_new(e, !ret, SOURCE_CHILD);
|
||||||
|
if (!s)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
s->wakeup = WAKEUP_EVENT_SOURCE;
|
||||||
|
s->child.pidfd = pidfd;
|
||||||
|
s->child.pid = pid;
|
||||||
|
s->child.options = options;
|
||||||
|
s->child.callback = callback;
|
||||||
|
s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
|
||||||
|
s->userdata = userdata;
|
||||||
|
s->enabled = SD_EVENT_ONESHOT;
|
||||||
|
|
||||||
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
|
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
e->n_enabled_child_sources++;
|
e->n_enabled_child_sources++;
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||||
|
/* We only want to watch for WEXITED */
|
||||||
|
|
||||||
|
r = source_child_pidfd_register(s, s->enabled);
|
||||||
|
if (r < 0) {
|
||||||
|
e->n_enabled_child_sources--;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* We shall wait for some other event than WEXITED */
|
||||||
|
|
||||||
r = event_make_signal_data(e, SIGCHLD, NULL);
|
r = event_make_signal_data(e, SIGCHLD, NULL);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
e->n_enabled_child_sources--;
|
e->n_enabled_child_sources--;
|
||||||
|
@ -1171,11 +1376,12 @@ _public_ int sd_event_add_child(
|
||||||
}
|
}
|
||||||
|
|
||||||
e->need_process_child = true;
|
e->need_process_child = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
*ret = s;
|
*ret = s;
|
||||||
TAKE_PTR(s);
|
|
||||||
|
|
||||||
|
TAKE_PTR(s);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2023,7 +2229,11 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
||||||
assert(s->event->n_enabled_child_sources > 0);
|
assert(s->event->n_enabled_child_sources > 0);
|
||||||
s->event->n_enabled_child_sources--;
|
s->event->n_enabled_child_sources--;
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s))
|
||||||
|
source_child_pidfd_unregister(s);
|
||||||
|
else
|
||||||
event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SOURCE_EXIT:
|
case SOURCE_EXIT:
|
||||||
|
@ -2097,6 +2307,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
||||||
|
|
||||||
s->enabled = m;
|
s->enabled = m;
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||||
|
/* yes, we have pidfd */
|
||||||
|
|
||||||
|
r = source_child_pidfd_register(s, s->enabled);
|
||||||
|
if (r < 0) {
|
||||||
|
s->enabled = SD_EVENT_OFF;
|
||||||
|
s->event->n_enabled_child_sources--;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* no pidfd, or something other to watch for than WEXITED */
|
||||||
|
|
||||||
r = event_make_signal_data(s->event, SIGCHLD, NULL);
|
r = event_make_signal_data(s->event, SIGCHLD, NULL);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
s->enabled = SD_EVENT_OFF;
|
s->enabled = SD_EVENT_OFF;
|
||||||
|
@ -2104,6 +2326,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
||||||
event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
event_gc_signal_data(s->event, &s->priority, SIGCHLD);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2225,6 +2448,98 @@ _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
|
||||||
|
assert_return(s, -EINVAL);
|
||||||
|
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||||
|
assert_return(!event_pid_changed(s->event), -ECHILD);
|
||||||
|
|
||||||
|
if (s->child.pidfd < 0)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
return s->child.pidfd;
|
||||||
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
|
||||||
|
assert_return(s, -EINVAL);
|
||||||
|
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||||
|
assert_return(!event_pid_changed(s->event), -ECHILD);
|
||||||
|
assert_return(SIGNAL_VALID(sig), -EINVAL);
|
||||||
|
|
||||||
|
/* If we already have seen indication the process exited refuse sending a signal early. This way we
|
||||||
|
* can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
|
||||||
|
* available. */
|
||||||
|
if (s->child.exited)
|
||||||
|
return -ESRCH;
|
||||||
|
|
||||||
|
if (s->child.pidfd >= 0) {
|
||||||
|
siginfo_t copy;
|
||||||
|
|
||||||
|
/* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
|
||||||
|
* structure here */
|
||||||
|
if (si)
|
||||||
|
copy = *si;
|
||||||
|
|
||||||
|
if (pidfd_send_signal(s->child.pidfd, sig, si ? © : NULL, 0) < 0) {
|
||||||
|
/* Let's propagate the error only if the system call is not implemented or prohibited */
|
||||||
|
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||||
|
return -errno;
|
||||||
|
} else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
|
||||||
|
* this here. */
|
||||||
|
if (flags != 0)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
if (si) {
|
||||||
|
/* We use rt_sigqueueinfo() only if siginfo_t is specified. */
|
||||||
|
siginfo_t copy = *si;
|
||||||
|
|
||||||
|
if (rt_sigqueueinfo(s->child.pid, sig, ©) < 0)
|
||||||
|
return -errno;
|
||||||
|
} else if (kill(s->child.pid, sig) < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
|
||||||
|
assert_return(s, -EINVAL);
|
||||||
|
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||||
|
|
||||||
|
if (s->child.pidfd < 0)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
return s->child.pidfd_owned;
|
||||||
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
|
||||||
|
assert_return(s, -EINVAL);
|
||||||
|
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||||
|
|
||||||
|
if (s->child.pidfd < 0)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
s->child.pidfd_owned = own;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
|
||||||
|
assert_return(s, -EINVAL);
|
||||||
|
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||||
|
|
||||||
|
return s->child.process_owned;
|
||||||
|
}
|
||||||
|
|
||||||
|
_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
|
||||||
|
assert_return(s, -EINVAL);
|
||||||
|
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||||
|
|
||||||
|
s->child.process_owned = own;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
|
_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
|
||||||
assert_return(s, -EINVAL);
|
assert_return(s, -EINVAL);
|
||||||
assert_return(mask, -EINVAL);
|
assert_return(mask, -EINVAL);
|
||||||
|
@ -2535,6 +2850,12 @@ static int process_child(sd_event *e) {
|
||||||
if (s->enabled == SD_EVENT_OFF)
|
if (s->enabled == SD_EVENT_OFF)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (s->child.exited)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
|
||||||
|
continue;
|
||||||
|
|
||||||
zero(s->child.siginfo);
|
zero(s->child.siginfo);
|
||||||
r = waitid(P_PID, s->child.pid, &s->child.siginfo,
|
r = waitid(P_PID, s->child.pid, &s->child.siginfo,
|
||||||
WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
|
WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
|
||||||
|
@ -2544,6 +2865,9 @@ static int process_child(sd_event *e) {
|
||||||
if (s->child.siginfo.si_pid != 0) {
|
if (s->child.siginfo.si_pid != 0) {
|
||||||
bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
|
bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
|
||||||
|
|
||||||
|
if (zombie)
|
||||||
|
s->child.exited = true;
|
||||||
|
|
||||||
if (!zombie && (s->child.options & WEXITED)) {
|
if (!zombie && (s->child.options & WEXITED)) {
|
||||||
/* If the child isn't dead then let's
|
/* If the child isn't dead then let's
|
||||||
* immediately remove the state change
|
* immediately remove the state change
|
||||||
|
@ -2563,6 +2887,33 @@ static int process_child(sd_event *e) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
|
||||||
|
assert(e);
|
||||||
|
assert(s);
|
||||||
|
assert(s->type == SOURCE_CHILD);
|
||||||
|
|
||||||
|
if (s->pending)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (s->enabled == SD_EVENT_OFF)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!EVENT_SOURCE_WATCH_PIDFD(s))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
zero(s->child.siginfo);
|
||||||
|
if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
if (s->child.siginfo.si_pid == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
|
||||||
|
s->child.exited = true;
|
||||||
|
|
||||||
|
return source_set_pending(s, true);
|
||||||
|
}
|
||||||
|
|
||||||
static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
|
static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
|
||||||
bool read_one = false;
|
bool read_one = false;
|
||||||
int r;
|
int r;
|
||||||
|
@ -2847,8 +3198,10 @@ static int source_dispatch(sd_event_source *s) {
|
||||||
r = s->child.callback(s, &s->child.siginfo, s->userdata);
|
r = s->child.callback(s, &s->child.siginfo, s->userdata);
|
||||||
|
|
||||||
/* Now, reap the PID for good. */
|
/* Now, reap the PID for good. */
|
||||||
if (zombie)
|
if (zombie) {
|
||||||
(void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
|
(void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
|
||||||
|
s->child.waited = true;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -3144,12 +3497,33 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
|
||||||
|
|
||||||
switch (*t) {
|
switch (*t) {
|
||||||
|
|
||||||
case WAKEUP_EVENT_SOURCE:
|
case WAKEUP_EVENT_SOURCE: {
|
||||||
r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
|
sd_event_source *s = ev_queue[i].data.ptr;
|
||||||
|
|
||||||
|
assert(s);
|
||||||
|
|
||||||
|
switch (s->type) {
|
||||||
|
|
||||||
|
case SOURCE_IO:
|
||||||
|
r = process_io(e, s, ev_queue[i].events);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SOURCE_CHILD:
|
||||||
|
r = process_pidfd(e, s, ev_queue[i].events);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert_not_reached("Unexpected event source type");
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case WAKEUP_CLOCK_DATA: {
|
case WAKEUP_CLOCK_DATA: {
|
||||||
struct clock_data *d = ev_queue[i].data.ptr;
|
struct clock_data *d = ev_queue[i].data.ptr;
|
||||||
|
|
||||||
|
assert(d);
|
||||||
|
|
||||||
r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
|
r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <sys/inotify.h>
|
#include <sys/inotify.h>
|
||||||
#include <sys/signalfd.h>
|
#include <sys/signalfd.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include "_sd-common.h"
|
#include "_sd-common.h"
|
||||||
|
@ -89,6 +90,7 @@ int sd_event_add_io(sd_event *e, sd_event_source **s, int fd, uint32_t events, s
|
||||||
int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata);
|
int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata);
|
||||||
int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata);
|
int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata);
|
||||||
int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata);
|
int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata);
|
||||||
|
int sd_event_add_child_pidfd(sd_event *e, sd_event_source **s, int pidfd, int options, sd_event_child_handler_t callback, void *userdata);
|
||||||
int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata);
|
int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata);
|
||||||
int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
|
int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
|
||||||
int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
|
int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
|
||||||
|
@ -141,6 +143,16 @@ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec);
|
||||||
int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock);
|
int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock);
|
||||||
int sd_event_source_get_signal(sd_event_source *s);
|
int sd_event_source_get_signal(sd_event_source *s);
|
||||||
int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid);
|
int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid);
|
||||||
|
int sd_event_source_get_child_pidfd(sd_event_source *s);
|
||||||
|
int sd_event_source_get_child_pidfd_own(sd_event_source *s);
|
||||||
|
int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own);
|
||||||
|
int sd_event_source_get_child_process_own(sd_event_source *s);
|
||||||
|
int sd_event_source_set_child_process_own(sd_event_source *s, int own);
|
||||||
|
#if defined _GNU_SOURCE || (defined _POSIX_C_SOURCE && _POSIX_C_SOURCE >= 199309L)
|
||||||
|
int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags);
|
||||||
|
#else
|
||||||
|
int sd_event_source_send_child_signal(sd_event_source *s, int sig, const void *si, unsigned flags);
|
||||||
|
#endif
|
||||||
int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret);
|
int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret);
|
||||||
int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback);
|
int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback);
|
||||||
int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret);
|
int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret);
|
||||||
|
|
Loading…
Reference in a new issue