From 575b300b795b6765d66e0f351d80635c7d3addea Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 23 Jan 2018 18:18:13 +0100 Subject: [PATCH] pid1: rework how we dispatch SIGCHLD and other signals This fundamentally makes one change: we never process more than one signal or more than one waitid() event per event loop. We'll never tight loop around waitid() or around read() on our signalfd instead, but always return to the main event loop after processing one event. By doing this we put the event priorization handling into full power again, as we'll always check for higher priority events before looking at the next signal or waitid() again. This introduces a new "defer" event source "sigchld_event". It's enabled as soon as we see SIGCHLD, and disabled as soon as waitid() reported no further children pending. It's running at a relatively high priority, one step higher than signal handling itself, but lower than /proc/self/mountinfo event handling, so that the latter always takes precedence. Since we want to process sd_notify() events at an even higher priority than SIGCHLD (as before) it is moved one priority step up, too. Fixes: #7932 Possibly fixes: #7966 --- src/core/manager.c | 514 ++++++++++++++++++++++++--------------------- src/core/manager.h | 2 + 2 files changed, 274 insertions(+), 242 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index e8140464eb..2dec25b0d4 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -109,6 +109,7 @@ static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32 static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata); static int manager_dispatch_run_queue(sd_event_source *source, void *userdata); +static int manager_dispatch_sigchld(sd_event_source *source, void *userdata); static int manager_run_environment_generators(Manager *m); static int manager_run_generators(Manager *m); @@ -634,6 +635,29 @@ static int manager_setup_run_queue(Manager *m) { return 0; } +static int manager_setup_sigchld_event_source(Manager *m) { + int r; + + assert(m); + assert(!m->sigchld_event_source); + + r = sd_event_add_defer(m->event, &m->sigchld_event_source, manager_dispatch_sigchld, m); + if (r < 0) + return r; + + r = sd_event_source_set_priority(m->sigchld_event_source, SD_EVENT_PRIORITY_NORMAL-7); + if (r < 0) + return r; + + r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF); + if (r < 0) + return r; + + (void) sd_event_source_set_description(m->sigchld_event_source, "manager-sigchld"); + + return 0; +} + int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { Manager *m; int r; @@ -734,6 +758,10 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { if (r < 0) goto fail; + r = manager_setup_sigchld_event_source(m); + if (r < 0) + goto fail; + m->udev = udev_new(); if (!m->udev) { r = -ENOMEM; @@ -817,7 +845,7 @@ static int manager_setup_notify(Manager *m) { /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which * service an exit message belongs. */ - r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-7); + r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-8); if (r < 0) return log_error_errno(r, "Failed to set priority of notify event source: %m"); @@ -1185,6 +1213,7 @@ Manager* manager_free(Manager *m) { set_free(m->failed_units); sd_event_source_unref(m->signal_event_source); + sd_event_source_unref(m->sigchld_event_source); sd_event_source_unref(m->notify_event_source); sd_event_source_unref(m->cgroups_agent_event_source); sd_event_source_unref(m->time_change_event_source); @@ -2052,65 +2081,71 @@ static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) { } } -static int manager_dispatch_sigchld(Manager *m) { +static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) { + Manager *m = userdata; + siginfo_t si = {}; + int r; + + assert(source); assert(m); - for (;;) { - siginfo_t si = {}; + /* First we call waitd() for a PID and do not reap the zombie. That way we can still access /proc/$PID for it + * while it is a zombie. */ - /* First we call waitd() for a PID and do not reap the - * zombie. That way we can still access /proc/$PID for - * it while it is a zombie. */ - if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) { + if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) { - if (errno == ECHILD) - break; + if (errno == ECHILD) + goto turn_off; - if (errno == EINTR) - continue; - - return -errno; - } - - if (si.si_pid <= 0) - break; - - if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) { - _cleanup_free_ char *name = NULL; - Unit *u1, *u2, *u3; - - get_process_comm(si.si_pid, &name); - - log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)", - si.si_pid, strna(name), - sigchld_code_to_string(si.si_code), - si.si_status, - strna(si.si_code == CLD_EXITED - ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL) - : signal_to_string(si.si_status))); - - /* And now figure out the unit this belongs - * to, it might be multiple... */ - u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); - if (u1) - invoke_sigchld_event(m, u1, &si); - u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid)); - if (u2 && u2 != u1) - invoke_sigchld_event(m, u2, &si); - u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid)); - if (u3 && u3 != u2 && u3 != u1) - invoke_sigchld_event(m, u3, &si); - } - - /* And now, we actually reap the zombie. */ - if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) { - if (errno == EINTR) - continue; - - return -errno; - } + log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m"); + return 0; } + if (si.si_pid <= 0) + goto turn_off; + + if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) { + _cleanup_free_ char *name = NULL; + Unit *u1, *u2, *u3; + + (void) get_process_comm(si.si_pid, &name); + + log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)", + si.si_pid, strna(name), + sigchld_code_to_string(si.si_code), + si.si_status, + strna(si.si_code == CLD_EXITED + ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL) + : signal_to_string(si.si_status))); + + /* And now figure out the unit this belongs + * to, it might be multiple... */ + u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); + if (u1) + invoke_sigchld_event(m, u1, &si); + u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid)); + if (u2 && u2 != u1) + invoke_sigchld_event(m, u2, &si); + u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid)); + if (u3 && u3 != u2 && u3 != u1) + invoke_sigchld_event(m, u3, &si); + } + + /* And now, we actually reap the zombie. */ + if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) { + log_error_errno(errno, "Failed to dequeue child, ignoring: %m"); + return 0; + } + + return 0; + +turn_off: + /* All children processed for now, turn off event source */ + + r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF); + if (r < 0) + return log_error_errno(r, "Failed to disable SIGCHLD event source: %m"); + return 0; } @@ -2141,7 +2176,6 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t Manager *m = userdata; ssize_t n; struct signalfd_siginfo sfsi; - bool sigchld = false; int r; assert(m); @@ -2152,195 +2186,192 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t return 0; } - for (;;) { - n = read(m->signal_fd, &sfsi, sizeof(sfsi)); - if (n != sizeof(sfsi)) { - if (n >= 0) { - log_warning("Truncated read from signal fd (%zu bytes)!", n); + n = read(m->signal_fd, &sfsi, sizeof(sfsi)); + if (n != sizeof(sfsi)) { + if (n >= 0) { + log_warning("Truncated read from signal fd (%zu bytes), ignoring!", n); + return 0; + } + + if (IN_SET(errno, EINTR, EAGAIN)) + return 0; + + /* We return an error here, which will kill this handler, + * to avoid a busy loop on read error. */ + return log_error_errno(errno, "Reading from signal fd failed: %m"); + } + + log_received_signal(sfsi.ssi_signo == SIGCHLD || + (sfsi.ssi_signo == SIGTERM && MANAGER_IS_USER(m)) + ? LOG_DEBUG : LOG_INFO, + &sfsi); + + switch (sfsi.ssi_signo) { + + case SIGCHLD: + r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON); + if (r < 0) + log_warning_errno(r, "Failed to enable SIGCHLD even source, ignoring: %m"); + + break; + + case SIGTERM: + if (MANAGER_IS_SYSTEM(m)) { + /* This is for compatibility with the + * original sysvinit */ + r = verify_run_space_and_log("Refusing to reexecute"); + if (r >= 0) + m->exit_code = MANAGER_REEXECUTE; + break; + } + + _fallthrough_; + case SIGINT: + if (MANAGER_IS_SYSTEM(m)) + manager_handle_ctrl_alt_del(m); + else + manager_start_target(m, SPECIAL_EXIT_TARGET, + JOB_REPLACE_IRREVERSIBLY); + break; + + case SIGWINCH: + if (MANAGER_IS_SYSTEM(m)) + manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE); + + /* This is a nop on non-init */ + break; + + case SIGPWR: + if (MANAGER_IS_SYSTEM(m)) + manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE); + + /* This is a nop on non-init */ + break; + + case SIGUSR1: { + Unit *u; + + u = manager_get_unit(m, SPECIAL_DBUS_SERVICE); + + if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) { + log_info("Trying to reconnect to bus..."); + bus_init(m, true); + } + + if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) { + log_info("Loading D-Bus service..."); + manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE); + } + + break; + } + + case SIGUSR2: { + _cleanup_free_ char *dump = NULL; + + r = manager_get_dump_string(m, &dump); + if (r < 0) { + log_warning_errno(errno, "Failed to acquire manager dump: %m"); + break; + } + + log_dump(LOG_INFO, dump); + break; + } + + case SIGHUP: + r = verify_run_space_and_log("Refusing to reload"); + if (r >= 0) + m->exit_code = MANAGER_RELOAD; + break; + + default: { + + /* Starting SIGRTMIN+0 */ + static const struct { + const char *target; + JobMode mode; + } target_table[] = { + [0] = { SPECIAL_DEFAULT_TARGET, JOB_ISOLATE }, + [1] = { SPECIAL_RESCUE_TARGET, JOB_ISOLATE }, + [2] = { SPECIAL_EMERGENCY_TARGET, JOB_ISOLATE }, + [3] = { SPECIAL_HALT_TARGET, JOB_REPLACE_IRREVERSIBLY }, + [4] = { SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY }, + [5] = { SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY }, + [6] = { SPECIAL_KEXEC_TARGET, JOB_REPLACE_IRREVERSIBLY }, + }; + + /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */ + static const ManagerExitCode code_table[] = { + [0] = MANAGER_HALT, + [1] = MANAGER_POWEROFF, + [2] = MANAGER_REBOOT, + [3] = MANAGER_KEXEC, + }; + + if ((int) sfsi.ssi_signo >= SIGRTMIN+0 && + (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) { + int idx = (int) sfsi.ssi_signo - SIGRTMIN; + manager_start_target(m, target_table[idx].target, + target_table[idx].mode); + break; + } + + if ((int) sfsi.ssi_signo >= SIGRTMIN+13 && + (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) { + m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13]; + break; + } + + switch (sfsi.ssi_signo - SIGRTMIN) { + + case 20: + manager_set_show_status(m, SHOW_STATUS_YES); + break; + + case 21: + manager_set_show_status(m, SHOW_STATUS_NO); + break; + + case 22: + log_set_max_level(LOG_DEBUG); + log_info("Setting log level to debug."); + break; + + case 23: + log_set_max_level(LOG_INFO); + log_info("Setting log level to info."); + break; + + case 24: + if (MANAGER_IS_USER(m)) { + m->exit_code = MANAGER_EXIT; return 0; } - if (IN_SET(errno, EINTR, EAGAIN)) - break; + /* This is a nop on init */ + break; - /* We return an error here, which will kill this handler, - * to avoid a busy loop on read error. */ - return log_error_errno(errno, "Reading from signal fd failed: %m"); + case 26: + case 29: /* compatibility: used to be mapped to LOG_TARGET_SYSLOG_OR_KMSG */ + log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); + log_notice("Setting log target to journal-or-kmsg."); + break; + + case 27: + log_set_target(LOG_TARGET_CONSOLE); + log_notice("Setting log target to console."); + break; + + case 28: + log_set_target(LOG_TARGET_KMSG); + log_notice("Setting log target to kmsg."); + break; + + default: + log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo)); } - - log_received_signal(sfsi.ssi_signo == SIGCHLD || - (sfsi.ssi_signo == SIGTERM && MANAGER_IS_USER(m)) - ? LOG_DEBUG : LOG_INFO, - &sfsi); - - switch (sfsi.ssi_signo) { - - case SIGCHLD: - sigchld = true; - break; - - case SIGTERM: - if (MANAGER_IS_SYSTEM(m)) { - /* This is for compatibility with the - * original sysvinit */ - r = verify_run_space_and_log("Refusing to reexecute"); - if (r >= 0) - m->exit_code = MANAGER_REEXECUTE; - break; - } - - _fallthrough_; - case SIGINT: - if (MANAGER_IS_SYSTEM(m)) - manager_handle_ctrl_alt_del(m); - else - manager_start_target(m, SPECIAL_EXIT_TARGET, - JOB_REPLACE_IRREVERSIBLY); - break; - - case SIGWINCH: - if (MANAGER_IS_SYSTEM(m)) - manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE); - - /* This is a nop on non-init */ - break; - - case SIGPWR: - if (MANAGER_IS_SYSTEM(m)) - manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE); - - /* This is a nop on non-init */ - break; - - case SIGUSR1: { - Unit *u; - - u = manager_get_unit(m, SPECIAL_DBUS_SERVICE); - - if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) { - log_info("Trying to reconnect to bus..."); - bus_init(m, true); - } - - if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) { - log_info("Loading D-Bus service..."); - manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE); - } - - break; - } - - case SIGUSR2: { - _cleanup_free_ char *dump = NULL; - - r = manager_get_dump_string(m, &dump); - if (r < 0) { - log_warning_errno(errno, "Failed to acquire manager dump: %m"); - break; - } - - log_dump(LOG_INFO, dump); - break; - } - - case SIGHUP: - r = verify_run_space_and_log("Refusing to reload"); - if (r >= 0) - m->exit_code = MANAGER_RELOAD; - break; - - default: { - - /* Starting SIGRTMIN+0 */ - static const struct { - const char *target; - JobMode mode; - } target_table[] = { - [0] = { SPECIAL_DEFAULT_TARGET, JOB_ISOLATE }, - [1] = { SPECIAL_RESCUE_TARGET, JOB_ISOLATE }, - [2] = { SPECIAL_EMERGENCY_TARGET, JOB_ISOLATE }, - [3] = { SPECIAL_HALT_TARGET, JOB_REPLACE_IRREVERSIBLY }, - [4] = { SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY }, - [5] = { SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY }, - [6] = { SPECIAL_KEXEC_TARGET, JOB_REPLACE_IRREVERSIBLY } - }; - - /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */ - static const ManagerExitCode code_table[] = { - [0] = MANAGER_HALT, - [1] = MANAGER_POWEROFF, - [2] = MANAGER_REBOOT, - [3] = MANAGER_KEXEC - }; - - if ((int) sfsi.ssi_signo >= SIGRTMIN+0 && - (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) { - int idx = (int) sfsi.ssi_signo - SIGRTMIN; - manager_start_target(m, target_table[idx].target, - target_table[idx].mode); - break; - } - - if ((int) sfsi.ssi_signo >= SIGRTMIN+13 && - (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) { - m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13]; - break; - } - - switch (sfsi.ssi_signo - SIGRTMIN) { - - case 20: - manager_set_show_status(m, SHOW_STATUS_YES); - break; - - case 21: - manager_set_show_status(m, SHOW_STATUS_NO); - break; - - case 22: - log_set_max_level(LOG_DEBUG); - log_info("Setting log level to debug."); - break; - - case 23: - log_set_max_level(LOG_INFO); - log_info("Setting log level to info."); - break; - - case 24: - if (MANAGER_IS_USER(m)) { - m->exit_code = MANAGER_EXIT; - return 0; - } - - /* This is a nop on init */ - break; - - case 26: - case 29: /* compatibility: used to be mapped to LOG_TARGET_SYSLOG_OR_KMSG */ - log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); - log_notice("Setting log target to journal-or-kmsg."); - break; - - case 27: - log_set_target(LOG_TARGET_CONSOLE); - log_notice("Setting log target to console."); - break; - - case 28: - log_set_target(LOG_TARGET_KMSG); - log_notice("Setting log target to kmsg."); - break; - - default: - log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo)); - } - } - } - } - - if (sigchld) - manager_dispatch_sigchld(m); + }} return 0; } @@ -2415,11 +2446,10 @@ int manager_loop(Manager *m) { manager_check_finished(m); - /* There might still be some zombies hanging around from - * before we were exec()'ed. Let's reap them. */ - r = manager_dispatch_sigchld(m); + /* There might still be some zombies hanging around from before we were exec()'ed. Let's reap them. */ + r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON); if (r < 0) - return r; + return log_error_errno(r, "Failed to enable SIGCHLD event source: %m"); while (m->exit_code == MANAGER_OK) { usec_t wait_usec; diff --git a/src/core/manager.h b/src/core/manager.h index 216ecea6fd..3af780f866 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -172,6 +172,8 @@ struct Manager { int signal_fd; sd_event_source *signal_event_source; + sd_event_source *sigchld_event_source; + int time_change_fd; sd_event_source *time_change_event_source;