manager: Only invoke a single sigchld per unit within a cleanup cycle

By default, each iteration of manager_dispatch_sigchld() results in a unit level
sigchld event being invoked. For scope units, this results in a scope_sigchld_event()
which can seemingly stall for workloads that have a large number of PIDs within the
scope. The stall exhibits itself as a SIG_0 being initiated for each u->pids entry
as a result of pid_is_unwaited().

v2:
This patch resolves this condition by only paying to cost of a sigchld in the underlying
scope unit once per sigchld iteration. A new "sigchldgen" member resides within the
Unit struct. The Manager is incremented via the sd event loop, accessed via
sd_event_get_iteration, and the Unit member is set to the same value as the manager each
time that a sigchld event is invoked. If the Manager iteration value and Unit member
match, the sigchld event is not invoked for that iteration.
This commit is contained in:
Kyle Walker 2016-06-30 15:12:18 -04:00
parent 7486322b99
commit 36f20ae3b2
3 changed files with 15 additions and 2 deletions

View File

@ -1716,16 +1716,25 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
}
static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) {
uint64_t iteration;
assert(m);
assert(u);
assert(si);
sd_event_get_iteration(m->event, &iteration);
log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, u->id);
unit_unwatch_pid(u, si->si_pid);
if (UNIT_VTABLE(u)->sigchld_event)
UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
if (UNIT_VTABLE(u)->sigchld_event) {
if (set_size(u->pids) <= 1 || iteration != u->sigchldgen) {
UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
u->sigchldgen = iteration;
} else
log_debug("%s already issued a sigchld this iteration %llu, skipping. Pids still being watched %d", u->id, iteration, set_size(u->pids));
}
}
static int manager_dispatch_sigchld(Manager *m) {

View File

@ -100,6 +100,7 @@ Unit *unit_new(Manager *m, size_t size) {
u->on_failure_job_mode = JOB_REPLACE;
u->cgroup_inotify_wd = -1;
u->job_timeout = USEC_INFINITY;
u->sigchldgen = 0;
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);

View File

@ -162,6 +162,9 @@ struct Unit {
* process SIGCHLD for */
Set *pids;
/* Used in sigchld event invocation to avoid repeat events being invoked */
uint64_t sigchldgen;
/* Used during GC sweeps */
unsigned gc_marker;