Merge pull request #8171 from poettering/sd-bus-queue-limit

try not to overload pid1's bus message write queue
This commit is contained in:
Lennart Poettering 2018-02-28 18:15:40 +01:00 committed by GitHub
commit 649a5ffba8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 237 additions and 9 deletions

View File

@ -186,6 +186,7 @@ manpages = [
['SD_BUS_ERROR_END', 'SD_BUS_ERROR_MAP', 'sd_bus_error_map'],
''],
['sd_bus_get_fd', '3', [], ''],
['sd_bus_get_n_queued_read', '3', ['sd_bus_get_n_queued_write'], ''],
['sd_bus_is_open', '3', ['sd_bus_is_ready'], ''],
['sd_bus_message_append', '3', ['sd_bus_message_appendv'], ''],
['sd_bus_message_append_array',

View File

@ -0,0 +1,129 @@
<?xml version='1.0'?> <!--*- Mode: nxml; nxml-child-indent: 2; indent-tabs-mode: nil -*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!--
SPDX-License-Identifier: LGPL-2.1+
This file is part of systemd.
Copyright 2018 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
-->
<refentry id="sd_bus_get_n_queued_read">
<refentryinfo>
<title>sd_bus_get_fd</title>
<productname>systemd</productname>
<authorgroup>
<author>
<contrib>Developer</contrib>
<firstname>Lennart</firstname>
<surname>Poettering</surname>
<email>lennart@poettering.net</email>
</author>
</authorgroup>
</refentryinfo>
<refmeta>
<refentrytitle>sd_bus_get_n_queued_read</refentrytitle>
<manvolnum>3</manvolnum>
</refmeta>
<refnamediv>
<refname>sd_bus_get_n_queued_read</refname>
<refname>sd_bus_get_n_queued_write</refname>
<refpurpose>Get the number of pending bus messages in the read and write queues of a bus connection object</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis>
<funcsynopsisinfo>#include &lt;systemd/sd-bus.h&gt;</funcsynopsisinfo>
<funcprototype>
<funcdef>int <function>sd_bus_get_n_queued_read</function></funcdef>
<paramdef>sd_bus *<parameter>bus</parameter></paramdef>
<paramdef>uint64_t *<parameter>ret</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>int <function>sd_bus_get_n_queued_write</function></funcdef>
<paramdef>sd_bus *<parameter>bus</parameter></paramdef>
<paramdef>uint64_t *<parameter>ret</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para>
<function>sd_bus_get_n_queued_read()</function> may be used to query the number of bus messages in the read queue
of a bus connection object. The read queue contains all messages read from the transport medium (e.g. network
socket) but not yet processed locally. The function expects two arguments: the bus object to query the number of
queued messages of, and a pointer to a 64bit counter variable to write the current queue size to. Use
<function>sd_bus_process()</function> in order to process queued messages, i.e. to reduce the size of the read
queue (as well as, in fact, the write queue, see below) when it is non-zero.
</para>
<para>
Similar, <function>sd_bus_get_n_queued_write()</function> may be used to query the number of currently pending
bus messages in the write queue of a bus connection object. The write queue contains all messages enqueued into
the connection with a call such as <function>sd_bus_send()</function> but not yet written to the transport
medium. The expected arguments are similar to the ones of <function>sd_bus_get_n_queued_read()</function>. Here
too, use <function>sd_bus_process()</function> to reduce the size of the write queue. Alternatively, use
<function>sd_bus_flush()</function> to synchronously write out any pending bus messages until the write queue is
empty.
</para>
</refsect1>
<refsect1>
<title>Return Value</title>
<para>On success, these functions return 0 or a positive integer. On failure, they return a negative errno-style
error code.</para>
</refsect1>
<refsect1>
<title>Errors</title>
<para>Returned errors may indicate the following problems:</para>
<variablelist>
<varlistentry>
<term><constant>-ECHILD</constant></term>
<listitem><para>The bus connection has been created in a different process.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd-bus</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_bus_process</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_bus_send</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_bus_flush</refentrytitle><manvolnum>3</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>

View File

@ -1197,6 +1197,11 @@ int bus_foreach_bus(
/* Send to all direct buses, unconditionally */
SET_FOREACH(b, m->private_buses, i) {
/* Don't bother with enqueing these messages to clients that haven't started yet */
if (sd_bus_is_ready(b) <= 0)
continue;
r = send_message(b, userdata);
if (r < 0)
ret = r;
@ -1274,3 +1279,34 @@ int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_erro
int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.set-environment", NULL, false, UID_INVALID, &m->polkit_registry, error);
}
uint64_t manager_bus_n_queued_write(Manager *m) {
uint64_t c = 0;
Iterator i;
sd_bus *b;
int r;
/* Returns the total number of messages queued for writing on all our direct and API busses. */
SET_FOREACH(b, m->private_buses, i) {
uint64_t k;
r = sd_bus_get_n_queued_write(b, &k);
if (r < 0)
log_debug_errno(r, "Failed to query queued messages for private bus: %m");
else
c += k;
}
if (m->api_bus) {
uint64_t k;
r = sd_bus_get_n_queued_write(m->api_bus, &k);
if (r < 0)
log_debug_errno(r, "Failed to query queued messages for API bus: %m");
else
c += k;
}
return c;
}

View File

@ -48,3 +48,5 @@ int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_erro
int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
int bus_forward_agent_released(Manager *m, const char *path);
uint64_t manager_bus_n_queued_write(Manager *m);

View File

@ -102,6 +102,13 @@
#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
/* If there are more than 1K bus messages queue across our API and direct busses, then let's not add more on top until
* the queue gets more empty. */
#define MANAGER_BUS_BUSY_THRESHOLD 1024LU
/* How many units and jobs to process of the bus queue before returning to the event loop. */
#define MANAGER_BUS_MESSAGE_BUDGET 100U
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
@ -1886,41 +1893,65 @@ static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
}
static unsigned manager_dispatch_dbus_queue(Manager *m) {
Job *j;
unsigned n = 0, budget;
Unit *u;
unsigned n = 0;
Job *j;
assert(m);
if (m->dispatching_dbus_queue)
return 0;
/* Anything to do at all? */
if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message)
return 0;
/* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this
* cycle out, and process things in a later cycle when the queues got a bit emptier. */
if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
return 0;
/* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly
* full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize
* CPU time with generating these messages. Note the difference in counting of this "budget" and the
* "threshold" above: the "budget" is decreased only once per generated message, regardless how many
* busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus
* message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five
* times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that
* it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put
* a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that
* the "threshold" is currently chosen much higher than the "budget". */
budget = MANAGER_BUS_MESSAGE_BUDGET;
m->dispatching_dbus_queue = true;
while ((u = m->dbus_unit_queue)) {
while (budget > 0 && (u = m->dbus_unit_queue)) {
assert(u->in_dbus_queue);
bus_unit_send_change_signal(u);
n++;
n++, budget--;
}
while ((j = m->dbus_job_queue)) {
while (budget > 0 && (j = m->dbus_job_queue)) {
assert(j->in_dbus_queue);
bus_job_send_change_signal(j);
n++;
n++, budget--;
}
m->dispatching_dbus_queue = false;
if (m->send_reloading_done) {
if (budget > 0 && m->send_reloading_done) {
m->send_reloading_done = false;
bus_manager_send_reloading(m, false);
n++, budget--;
}
if (m->queued_message)
if (budget > 0 && m->queued_message) {
bus_send_queued_message(m);
n++;
}
return n;
}

View File

@ -549,3 +549,9 @@ global:
sd_event_source_get_io_fd_own;
sd_event_source_set_io_fd_own;
} LIBSYSTEMD_236;
LIBSYSTEMD_238 {
global:
sd_bus_get_n_queued_read;
sd_bus_get_n_queued_write;
} LIBSYSTEMD_237;

View File

@ -4091,3 +4091,23 @@ _public_ int sd_bus_get_sender(sd_bus *bus, const char **ret) {
*ret = bus->patch_sender;
return 0;
}
_public_ int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret) {
assert_return(bus, -EINVAL);
assert_return(bus = bus_resolve(bus), -ENOPKG);
assert_return(!bus_pid_changed(bus), -ECHILD);
assert_return(ret, -EINVAL);
*ret = bus->rqueue_size;
return 0;
}
_public_ int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret) {
assert_return(bus, -EINVAL);
assert_return(bus = bus_resolve(bus), -ENOPKG);
assert_return(!bus_pid_changed(bus), -ECHILD);
assert_return(ret, -EINVAL);
*ret = bus->wqueue_size;
return 0;
}

View File

@ -202,6 +202,9 @@ int sd_bus_attach_event(sd_bus *bus, sd_event *e, int priority);
int sd_bus_detach_event(sd_bus *bus);
sd_event *sd_bus_get_event(sd_bus *bus);
int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret);
int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret);
int sd_bus_add_filter(sd_bus *bus, sd_bus_slot **slot, sd_bus_message_handler_t callback, void *userdata);
int sd_bus_add_match(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, void *userdata);
int sd_bus_add_match_async(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata);