diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 47a3b6d3a2..bffdeb62de 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2186,40 +2186,46 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) { Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) { _cleanup_free_ char *cgroup = NULL; - int r; assert(m); - if (pid <= 0) + if (!pid_is_valid(pid)) return NULL; - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); - if (r < 0) + if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup) < 0) return NULL; return manager_get_unit_by_cgroup(m, cgroup); } Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { - Unit *u; + Unit *u, **array; assert(m); - if (pid <= 0) + /* Note that a process might be owned by multiple units, we return only one here, which is good enough for most + * cases, though not strictly correct. We prefer the one reported by cgroup membership, as that's the most + * relevant one as children of the process will be assigned to that one, too, before all else. */ + + if (!pid_is_valid(pid)) return NULL; if (pid == getpid_cached()) return hashmap_get(m->units, SPECIAL_INIT_SCOPE); - u = hashmap_get(m->watch_pids1, PID_TO_PTR(pid)); + u = manager_get_unit_by_pid_cgroup(m, pid); if (u) return u; - u = hashmap_get(m->watch_pids2, PID_TO_PTR(pid)); + u = hashmap_get(m->watch_pids, PID_TO_PTR(pid)); if (u) return u; - return manager_get_unit_by_pid_cgroup(m, pid); + array = hashmap_get(m->watch_pids, PID_TO_PTR(-pid)); + if (array) + return array[0]; + + return NULL; } int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { diff --git a/src/core/manager.c b/src/core/manager.c index 2dec25b0d4..4130b24972 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1205,8 +1205,7 @@ Manager* manager_free(Manager *m) { hashmap_free(m->units); hashmap_free(m->units_by_invocation_id); hashmap_free(m->jobs); - hashmap_free(m->watch_pids1); - hashmap_free(m->watch_pids2); + hashmap_free(m->watch_pids); hashmap_free(m->watch_bus); set_free(m->startup_units); @@ -1915,22 +1914,27 @@ static void manager_invoke_notify_message( const char *buf, FDSet *fds) { - _cleanup_strv_free_ char **tags = NULL; - assert(m); assert(u); assert(ucred); assert(buf); - tags = strv_split(buf, NEWLINE); - if (!tags) { - log_oom(); + if (u->notifygen == m->notifygen) /* Already invoked on this same unit in this same iteration? */ return; - } + u->notifygen = m->notifygen; + + if (UNIT_VTABLE(u)->notify_message) { + _cleanup_strv_free_ char **tags = NULL; + + tags = strv_split(buf, NEWLINE); + if (!tags) { + log_oom(); + return; + } - if (UNIT_VTABLE(u)->notify_message) UNIT_VTABLE(u)->notify_message(u, ucred, tags, fds); - else if (DEBUG_LOGGING) { + + } else if (DEBUG_LOGGING) { _cleanup_free_ char *x = NULL, *y = NULL; x = ellipsize(buf, 20, 90); @@ -1964,9 +1968,11 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t struct cmsghdr *cmsg; struct ucred *ucred = NULL; - Unit *u1, *u2, *u3; + _cleanup_free_ Unit **array_copy = NULL; + Unit *u1, *u2, **array; int r, *fd_array = NULL; unsigned n_fds = 0; + bool found = false; ssize_t n; assert(m); @@ -2033,22 +2039,41 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t /* Make sure it's NUL-terminated. */ buf[n] = 0; - /* Notify every unit that might be interested, but try - * to avoid notifying the same one multiple times. */ + /* Increase the generation counter used for filtering out duplicate unit invocations. */ + m->notifygen++; + + /* Notify every unit that might be interested, which might be multiple. */ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); - if (u1) + u2 = hashmap_get(m->watch_pids, PID_TO_PTR(ucred->pid)); + array = hashmap_get(m->watch_pids, PID_TO_PTR(-ucred->pid)); + if (array) { + size_t k = 0; + + while (array[k]) + k++; + + array_copy = newdup(Unit*, array, k+1); + if (!array_copy) + log_oom(); + } + /* And now invoke the per-unit callbacks. Note that manager_invoke_notify_message() will handle duplicate units + * make sure we only invoke each unit's handler once. */ + if (u1) { manager_invoke_notify_message(m, u1, ucred, buf, fds); - - u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); - if (u2 && u2 != u1) + found = true; + } + if (u2) { manager_invoke_notify_message(m, u2, ucred, buf, fds); + found = true; + } + if (array_copy) + for (size_t i = 0; array_copy[i]; i++) { + manager_invoke_notify_message(m, array_copy[i], ucred, buf, fds); + found = true; + } - u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); - if (u3 && u3 != u2 && u3 != u1) - manager_invoke_notify_message(m, u3, ucred, buf, fds); - - if (!u1 && !u2 && !u3) - log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid); + if (!found) + log_warning("Cannot find unit for notify message of PID "PID_FMT", ignoring.", ucred->pid); if (fdset_size(fds) > 0) log_warning("Got extra auxiliary fds with notification message, closing them."); @@ -2056,29 +2081,25 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } -static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) { - uint64_t iteration; +static void manager_invoke_sigchld_event( + Manager *m, + Unit *u, + const siginfo_t *si) { assert(m); assert(u); assert(si); - sd_event_get_iteration(m->event, &iteration); - - log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, u->id); + /* Already invoked the handler of this unit in this iteration? Then don't process this again */ + if (u->sigchldgen == m->sigchldgen) + return; + u->sigchldgen = m->sigchldgen; + log_unit_debug(u, "Child "PID_FMT" belongs to %s.", si->si_pid, u->id); unit_unwatch_pid(u, si->si_pid); - if (UNIT_VTABLE(u)->sigchld_event) { - if (set_size(u->pids) <= 1 || - iteration != u->sigchldgen || - unit_main_pid(u) == si->si_pid || - unit_control_pid(u) == si->si_pid) { - UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status); - u->sigchldgen = iteration; - } else - log_debug("%s already issued a sigchld this iteration %" PRIu64 ", skipping. Pids still being watched %d", u->id, iteration, set_size(u->pids)); - } + if (UNIT_VTABLE(u)->sigchld_event) + UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status); } static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) { @@ -2105,8 +2126,9 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) { goto turn_off; if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) { + _cleanup_free_ Unit **array_copy = NULL; _cleanup_free_ char *name = NULL; - Unit *u1, *u2, *u3; + Unit *u1, *u2, **array; (void) get_process_comm(si.si_pid, &name); @@ -2118,17 +2140,36 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) { ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL) : signal_to_string(si.si_status))); - /* And now figure out the unit this belongs - * to, it might be multiple... */ + /* Increase the generation counter used for filtering out duplicate unit invocations */ + m->sigchldgen++; + + /* And now figure out the unit this belongs to, it might be multiple... */ u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); + u2 = hashmap_get(m->watch_pids, PID_TO_PTR(si.si_pid)); + array = hashmap_get(m->watch_pids, PID_TO_PTR(-si.si_pid)); + if (array) { + size_t n = 0; + + /* Cound how many entries the array has */ + while (array[n]) + n++; + + /* Make a copy of the array so that we don't trip up on the array changing beneath us */ + array_copy = newdup(Unit*, array, n+1); + if (!array_copy) + log_oom(); + } + + /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but + * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for + * each iteration. */ if (u1) - invoke_sigchld_event(m, u1, &si); - u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid)); - if (u2 && u2 != u1) - invoke_sigchld_event(m, u2, &si); - u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid)); - if (u3 && u3 != u2 && u3 != u1) - invoke_sigchld_event(m, u3, &si); + manager_invoke_sigchld_event(m, u1, &si); + if (u2) + manager_invoke_sigchld_event(m, u2, &si); + if (array_copy) + for (size_t i = 0; array_copy[i]; i++) + manager_invoke_sigchld_event(m, array_copy[i], &si); } /* And now, we actually reap the zombie. */ diff --git a/src/core/manager.h b/src/core/manager.h index 3af780f866..90d5258b53 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -145,14 +145,14 @@ struct Manager { sd_event *event; - /* We use two hash tables here, since the same PID might be - * watched by two different units: once the unit that forked - * it off, and possibly a different unit to which it was - * joined as cgroup member. Since we know that it is either - * one or two units for each PID we just use to hashmaps - * here. */ - Hashmap *watch_pids1; /* pid => Unit object n:1 */ - Hashmap *watch_pids2; /* pid => Unit object n:1 */ + /* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in + * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will + * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is + * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll + * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and + * negative PIDs are not used for regular processes but process groups, which we don't care about in this + * context, but this allows us to use the negative range for our own purposes. */ + Hashmap *watch_pids; /* pid => unit as well as -pid => array of units */ /* A set contains all units which cgroup should be refreshed after startup */ Set *startup_units; @@ -350,8 +350,13 @@ struct Manager { int first_boot; /* tri-state */ - /* prefixes of e.g. RuntimeDirectory= */ + /* Prefixes of e.g. RuntimeDirectory= */ char *prefix[_EXEC_DIRECTORY_TYPE_MAX]; + + /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event + * multiple times on the same unit. */ + unsigned sigchldgen; + unsigned notifygen; }; #define MANAGER_IS_SYSTEM(m) ((m)->unit_file_scope == UNIT_FILE_SYSTEM) diff --git a/src/core/unit.c b/src/core/unit.c index 308fe3f3e8..97585a7179 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2534,44 +2534,97 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su } int unit_watch_pid(Unit *u, pid_t pid) { - int q, r; + int r; assert(u); - assert(pid >= 1); + assert(pid_is_valid(pid)); - /* Watch a specific PID. We only support one or two units - * watching each PID for now, not more. */ + /* Watch a specific PID */ r = set_ensure_allocated(&u->pids, NULL); if (r < 0) return r; - r = hashmap_ensure_allocated(&u->manager->watch_pids1, NULL); + r = hashmap_ensure_allocated(&u->manager->watch_pids, NULL); if (r < 0) return r; - r = hashmap_put(u->manager->watch_pids1, PID_TO_PTR(pid), u); - if (r == -EEXIST) { - r = hashmap_ensure_allocated(&u->manager->watch_pids2, NULL); - if (r < 0) - return r; + /* First try, let's add the unit keyed by "pid". */ + r = hashmap_put(u->manager->watch_pids, PID_TO_PTR(pid), u); + if (r == -EEXIST) { + Unit **array; + bool found = false; + size_t n = 0; - r = hashmap_put(u->manager->watch_pids2, PID_TO_PTR(pid), u); - } + /* OK, the "pid" key is already assigned to a different unit. Let's see if the "-pid" key (which points + * to an array of Units rather than just a Unit), lists us already. */ - q = set_put(u->pids, PID_TO_PTR(pid)); - if (q < 0) - return q; + array = hashmap_get(u->manager->watch_pids, PID_TO_PTR(-pid)); + if (array) + for (; array[n]; n++) + if (array[n] == u) + found = true; - return r; + if (found) /* Found it already? if so, do nothing */ + r = 0; + else { + Unit **new_array; + + /* Allocate a new array */ + new_array = new(Unit*, n + 2); + if (!new_array) + return -ENOMEM; + + memcpy_safe(new_array, array, sizeof(Unit*) * n); + new_array[n] = u; + new_array[n+1] = NULL; + + /* Add or replace the old array */ + r = hashmap_replace(u->manager->watch_pids, PID_TO_PTR(-pid), new_array); + if (r < 0) { + free(new_array); + return r; + } + + free(array); + } + } else if (r < 0) + return r; + + r = set_put(u->pids, PID_TO_PTR(pid)); + if (r < 0) + return r; + + return 0; } void unit_unwatch_pid(Unit *u, pid_t pid) { - assert(u); - assert(pid >= 1); + Unit **array; + + assert(u); + assert(pid_is_valid(pid)); + + /* First let's drop the unit in case it's keyed as "pid". */ + (void) hashmap_remove_value(u->manager->watch_pids, PID_TO_PTR(pid), u); + + /* Then, let's also drop the unit, in case it's in the array keyed by -pid */ + array = hashmap_get(u->manager->watch_pids, PID_TO_PTR(-pid)); + if (array) { + size_t n, m = 0; + + /* Let's iterate through the array, dropping our own entry */ + for (n = 0; array[n]; n++) + if (array[n] != u) + array[m++] = array[n]; + array[m] = NULL; + + if (m == 0) { + /* The array is now empty, remove the entire entry */ + assert(hashmap_remove(u->manager->watch_pids, PID_TO_PTR(-pid)) == array); + free(array); + } + } - (void) hashmap_remove_value(u->manager->watch_pids1, PID_TO_PTR(pid), u); - (void) hashmap_remove_value(u->manager->watch_pids2, PID_TO_PTR(pid), u); (void) set_remove(u->pids, PID_TO_PTR(pid)); } diff --git a/src/core/unit.h b/src/core/unit.h index f3e991e79e..4cd0706676 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -236,8 +236,10 @@ struct Unit { * process SIGCHLD for */ Set *pids; - /* Used in sigchld event invocation to avoid repeat events being invoked */ - uint64_t sigchldgen; + /* Used in SIGCHLD and sd_notify() message event invocation logic to avoid that we dispatch the same event + * multiple times on the same unit. */ + unsigned sigchldgen; + unsigned notifygen; /* Used during GC sweeps */ unsigned gc_marker; diff --git a/src/test/meson.build b/src/test/meson.build index 18e957ddc8..9242e492f9 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -377,6 +377,17 @@ tests += [ libselinux, libblkid]], + [['src/test/test-watch-pid.c', + 'src/test/test-helper.c'], + [libcore, + libshared], + [libmount, + threads, + librt, + libseccomp, + libselinux, + libblkid]], + [['src/test/test-hashmap.c', 'src/test/test-hashmap-plain.c', test_hashmap_ordered_c], diff --git a/src/test/test-watch-pid.c b/src/test/test-watch-pid.c new file mode 100644 index 0000000000..ed6c3d05cc --- /dev/null +++ b/src/test/test-watch-pid.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "log.h" +#include "manager.h" +#include "rm-rf.h" +#include "test-helper.h" +#include "tests.h" + +int main(int argc, char *argv[]) { + _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL; + Unit *a, *b, *c, *u; + Manager *m; + int r; + + log_set_max_level(LOG_DEBUG); + log_parse_environment(); + log_open(); + + if (getuid() != 0) { + log_notice("Not running as root, skipping kernel related tests."); + return EXIT_TEST_SKIP; + } + + r = enter_cgroup_subroot(); + if (r == -ENOMEDIUM) { + log_notice("cgroupfs not available, skipping tests"); + return EXIT_TEST_SKIP; + } + + assert_se(set_unit_path(get_testdata_dir("")) >= 0); + assert_se(runtime_dir = setup_fake_runtime_dir()); + + assert_se(manager_new(UNIT_FILE_USER, true, &m) >= 0); + assert_se(manager_startup(m, NULL, NULL) >= 0); + + assert_se(a = unit_new(m, sizeof(Service))); + assert_se(unit_add_name(a, "a.service") >= 0); + assert_se(set_isempty(a->pids)); + + assert_se(b = unit_new(m, sizeof(Service))); + assert_se(unit_add_name(b, "b.service") >= 0); + assert_se(set_isempty(b->pids)); + + assert_se(c = unit_new(m, sizeof(Service))); + assert_se(unit_add_name(c, "c.service") >= 0); + assert_se(set_isempty(c->pids)); + + assert_se(hashmap_isempty(m->watch_pids)); + assert_se(manager_get_unit_by_pid(m, 4711) == NULL); + + assert_se(unit_watch_pid(a, 4711) >= 0); + assert_se(manager_get_unit_by_pid(m, 4711) == a); + + assert_se(unit_watch_pid(a, 4711) >= 0); + assert_se(manager_get_unit_by_pid(m, 4711) == a); + + assert_se(unit_watch_pid(b, 4711) >= 0); + u = manager_get_unit_by_pid(m, 4711); + assert_se(u == a || u == b); + + assert_se(unit_watch_pid(b, 4711) >= 0); + u = manager_get_unit_by_pid(m, 4711); + assert_se(u == a || u == b); + + assert_se(unit_watch_pid(c, 4711) >= 0); + u = manager_get_unit_by_pid(m, 4711); + assert_se(u == a || u == b || u == c); + + assert_se(unit_watch_pid(c, 4711) >= 0); + u = manager_get_unit_by_pid(m, 4711); + assert_se(u == a || u == b || u == c); + + unit_unwatch_pid(b, 4711); + u = manager_get_unit_by_pid(m, 4711); + assert_se(u == a || u == c); + + unit_unwatch_pid(b, 4711); + u = manager_get_unit_by_pid(m, 4711); + assert_se(u == a || u == c); + + unit_unwatch_pid(a, 4711); + assert_se(manager_get_unit_by_pid(m, 4711) == c); + + unit_unwatch_pid(a, 4711); + assert_se(manager_get_unit_by_pid(m, 4711) == c); + + unit_unwatch_pid(c, 4711); + assert_se(manager_get_unit_by_pid(m, 4711) == NULL); + + unit_unwatch_pid(c, 4711); + assert_se(manager_get_unit_by_pid(m, 4711) == NULL); + + manager_free(m); + + return 0; +}