readahead: add interface to sd-daemon.[ch] to control readahead

This commit is contained in:
Lennart Poettering 2010-09-26 15:50:14 +02:00
parent f0cf061eda
commit 6624768c9c
11 changed files with 405 additions and 18 deletions

View File

@ -421,6 +421,7 @@ MANPAGES = \
man/systemd-cgls.1 \
man/systemd-notify.1 \
man/sd_notify.3 \
man/sd_readahead.3 \
man/sd_booted.3 \
man/sd_listen_fds.3 \
man/sd_is_fifo.3 \

4
fixme
View File

@ -96,6 +96,10 @@
* readahead() vs. fadvise() vs. ioprio
* unneeded
* properly handle multiple inotify events per read() in path.c and util.c
External:
* place /etc/inittab with explaining blurb.

178
man/sd_readahead.xml Normal file
View File

@ -0,0 +1,178 @@
<?xml version='1.0'?> <!--*-nxml-*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!--
This file is part of systemd.
Copyright 2010 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
-->
<refentry id="sd_notify">
<refentryinfo>
<title>sd_readahead</title>
<productname>systemd</productname>
<authorgroup>
<author>
<contrib>Developer</contrib>
<firstname>Lennart</firstname>
<surname>Poettering</surname>
<email>lennart@poettering.net</email>
</author>
</authorgroup>
</refentryinfo>
<refmeta>
<refentrytitle>sd_readahead</refentrytitle>
<manvolnum>3</manvolnum>
</refmeta>
<refnamediv>
<refname>sd_readahead</refname>
<refpurpose>Control ongoing disk read-ahead operations</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis>
<funcsynopsisinfo>#include "sd-daemon.h"</funcsynopsisinfo>
<funcprototype>
<funcdef>int <function>sd_readahead</function></funcdef>
<paramdef>const char *<parameter>action</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para><function>sd_readahead()</function> may be
called by programs involved with early boot-up to
control ongoing disk read-ahead operations. It may be
used to terminate read-ahead operations in case an
uncommon disk access pattern is to be expected and
hence read-ahead replay or collection is unlikely to
have the desired speed-up effect on the current or
future boot-ups.</para>
<para>The <parameter>action</parameter> should be one
of the following strings:</para>
<variablelist>
<varlistentry>
<term>cancel</term>
<listitem><para>Terminates read-ahead
data collection, and drops all
read-ahead data collected during this
boot-up.</para></listitem>
</varlistentry>
<varlistentry>
<term>done</term>
<listitem><para>Terminates read-ahead
data collection, but keeps all
read-ahead data collected during this
boot-up around for use during
subsequent boot-ups.</para></listitem>
</varlistentry>
<varlistentry>
<term>noreplay</term>
<listitem><para>Terminates read-ahead
replay.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>Return Value</title>
<para>On failure, these calls return a negative
errno-style error code. It is generally recommended to
ignore the return value of this call.</para>
</refsect1>
<refsect1>
<title>Notes</title>
<para>This function is provided by the reference
implementation of APIs for new-style daemons and
distributed with the systemd package. The algorithm
it implements is simple, and can easily be
reimplemented in daemons if it is important to support
this interface without using the reference
implementation.</para>
<para>Internally, this function creates a file in
<filename>/dev/.systemd/readahead/</filename> which is
then used as flag file to notify the read-ahead
subsystem.</para>
<para>For details about the algorithm check the
liberally licensed reference implementation sources:
<ulink url="http://cgit.freedesktop.org/systemd/tree/src/sd-daemon.c"/>
resp. <ulink
url="http://cgit.freedesktop.org/systemd/tree/src/sd-daemon.h"/></para>
<para><function>sd_readahead()</function> is
implemented in the reference implementation's drop-in
<filename>sd-daemon.c</filename> and
<filename>sd-daemon.h</filename> files. It is
recommended that applications consuming this API copy
the implementation into their source tree. For more
details about the reference implementation see
<citerefentry><refentrytitle>sd_daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry></para>
<para>If -DDISABLE_SYSTEMD is set during compilation
this function will always return 0 and otherwise
become a NOP.</para>
</refsect1>
<refsect1>
<title>Examples</title>
<example>
<title>Cancelling all read-ahead operations</title>
<para>During boots where SELinux has to
relabel the file system hierarchy, it will
create a large amount of disk accesses that
are not necessary during normal boots. Hence
it is a good idea to disable both read-ahead replay and read-ahead collection.
</para>
<programlisting>sd_readahead("cancel");
sd_readahead("noreplay");</programlisting>
</example>
</refsect1>
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
<citerefentry><refentrytitle>daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>

View File

@ -147,6 +147,17 @@
semantics of this option see
<citerefentry><refentrytitle>sd_booted</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--readahead=</option></term>
<listitem><para>Controls disk
read-ahead operations. The argument
must be a string, and either "cancel",
"done" or "noreplay". For details
about the semantics of this option see
<citerefentry><refentrytitle>sd_readahead</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -36,16 +36,18 @@ static bool arg_ready = false;
static pid_t arg_pid = 0;
static const char *arg_status = NULL;
static bool arg_booted = false;
static const char *arg_readahead = NULL;
static int help(void) {
printf("%s [OPTIONS...] [VARIABLE=VALUE...]\n\n"
"Notify the init system about service status updates.\n\n"
" -h --help Show this help\n"
" --ready Inform the init system about service start-up completion\n"
" --pid[=PID] Set main pid of daemon\n"
" --status=TEXT Set status text\n"
" --booted Returns 0 if the system was booted up with systemd, non-zero otherwise\n",
" -h --help Show this help\n"
" --ready Inform the init system about service start-up completion\n"
" --pid[=PID] Set main pid of daemon\n"
" --status=TEXT Set status text\n"
" --booted Returns 0 if the system was booted up with systemd, non-zero otherwise\n"
" --readahead=ACTION Controls read-ahead operations\n",
program_invocation_short_name);
return 0;
@ -57,16 +59,18 @@ static int parse_argv(int argc, char *argv[]) {
ARG_READY = 0x100,
ARG_PID,
ARG_STATUS,
ARG_BOOTED
ARG_BOOTED,
ARG_READAHEAD
};
static const struct option options[] = {
{ "help", no_argument, NULL, 'h' },
{ "ready", no_argument, NULL, ARG_READY },
{ "pid", optional_argument, NULL, ARG_PID },
{ "status", required_argument, NULL, ARG_STATUS },
{ "booted", no_argument, NULL, ARG_BOOTED },
{ NULL, 0, NULL, 0 }
{ "help", no_argument, NULL, 'h' },
{ "ready", no_argument, NULL, ARG_READY },
{ "pid", optional_argument, NULL, ARG_PID },
{ "status", required_argument, NULL, ARG_STATUS },
{ "booted", no_argument, NULL, ARG_BOOTED },
{ "readahead", required_argument, NULL, ARG_READAHEAD },
{ NULL, 0, NULL, 0 }
};
int c;
@ -106,6 +110,10 @@ static int parse_argv(int argc, char *argv[]) {
arg_booted = true;
break;
case ARG_READAHEAD:
arg_readahead = optarg;
break;
case '?':
return -EINVAL;
@ -119,7 +127,8 @@ static int parse_argv(int argc, char *argv[]) {
!arg_ready &&
!arg_status &&
!arg_pid &&
!arg_booted) {
!arg_booted &&
!arg_readahead) {
help();
return -EINVAL;
}
@ -144,6 +153,13 @@ int main(int argc, char* argv[]) {
if (arg_booted)
return sd_booted() <= 0;
if (arg_readahead) {
if ((r = sd_readahead(arg_readahead)) < 0) {
log_error("Failed to issue read-ahead control command: %s", strerror(-r));
goto finish;
}
}
if (arg_ready)
our_env[i++] = (char*) "READY=1";

View File

@ -41,6 +41,7 @@
#include <sys/ioctl.h>
#include <sys/vfs.h>
#include <getopt.h>
#include <sys/inotify.h>
#include "missing.h"
#include "util.h"
@ -56,6 +57,7 @@
* - sd_readahead_cancel
* - gzip?
* - remount rw?
* - handle files where nothing is in mincore
* - does ioprio_set work with fadvise()?
*/
@ -199,12 +201,13 @@ static int qsort_compare(const void *a, const void *b) {
static int collect(const char *root) {
enum {
FD_FANOTIFY,
FD_FANOTIFY, /* Get the actualy fs events */
FD_SIGNAL,
FD_INOTIFY, /* We get notifications to quit early via this fd */
_FD_MAX
};
struct pollfd pollfd[_FD_MAX];
int fanotify_fd = -1, signal_fd = -1, r = 0;
int fanotify_fd = -1, signal_fd = -1, inotify_fd = -1, r = 0;
pid_t my_pid;
Hashmap *files = NULL;
Iterator i;
@ -251,6 +254,11 @@ static int collect(const char *root) {
goto finish;
}
if ((inotify_fd = open_inotify()) < 0) {
r = inotify_fd;
goto finish;
}
not_after = now(CLOCK_MONOTONIC) + arg_timeout;
my_pid = getpid();
@ -260,6 +268,8 @@ static int collect(const char *root) {
pollfd[FD_FANOTIFY].events = POLLIN;
pollfd[FD_SIGNAL].fd = signal_fd;
pollfd[FD_SIGNAL].events = POLLIN;
pollfd[FD_INOTIFY].fd = inotify_fd;
pollfd[FD_INOTIFY].events = POLLIN;
sd_notify(0,
"READY=1\n"
@ -267,6 +277,17 @@ static int collect(const char *root) {
log_debug("Collecting...");
if (access("/dev/.systemd/readahead/cancel", F_OK) >= 0) {
log_debug("Collection canceled");
r = -ECANCELED;
goto finish;
}
if (access("/dev/.systemd/readahead/done", F_OK) >= 0) {
log_debug("Got termination request");
goto done;
}
for (;;) {
union {
struct fanotify_event_metadata metadata;
@ -298,14 +319,52 @@ static int collect(const char *root) {
goto finish;
}
if (pollfd[FD_SIGNAL].revents != 0)
break;
if (h == 0) {
log_debug("Reached maximum collection time, ending collection.");
break;
}
if (pollfd[FD_SIGNAL].revents) {
log_debug("Got signal.");
break;
}
if (pollfd[FD_INOTIFY].revents) {
uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
struct inotify_event *e;
if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
if (errno == EINTR || errno == EAGAIN)
continue;
log_error("Failed to read inotify event: %m");
r = -errno;
goto finish;
}
e = (struct inotify_event*) inotify_buffer;
while (n > 0) {
size_t step;
if ((e->mask & IN_CREATE) && streq(e->name, "cancel")) {
log_debug("Collection canceled");
r = -ECANCELED;
goto finish;
}
if ((e->mask & IN_CREATE) && streq(e->name, "done")) {
log_debug("Got termination request");
goto done;
}
step = sizeof(struct inotify_event) + e->len;
assert(step <= (size_t) n);
e = (struct inotify_event*) ((uint8_t*) e + step);
n -= step;
}
}
if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) {
if (errno == EINTR || errno == EAGAIN)
@ -352,6 +411,7 @@ static int collect(const char *root) {
}
}
done:
if (fanotify_fd >= 0) {
close_nointr_nofail(fanotify_fd);
fanotify_fd = -1;
@ -451,6 +511,9 @@ finish:
if (signal_fd >= 0)
close_nointr_nofail(signal_fd);
if (inotify_fd >= 0)
close_nointr_nofail(inotify_fd);
if (pack) {
fclose(pack);
unlink(pack_fn_new);

View File

@ -24,6 +24,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/sysinfo.h>
#include <sys/inotify.h>
#include "log.h"
#include "readahead-common.h"
@ -116,3 +117,23 @@ bool enough_ram(void) {
* with at least 128MB
* memory */
}
int open_inotify(void) {
int fd;
if ((fd = inotify_init1(IN_CLOEXEC|IN_NONBLOCK)) < 0) {
log_error("Failed to create inotify handle: %m");
return -errno;
}
mkdir("/dev/.systemd", 0755);
mkdir("/dev/.systemd/readahead", 0755);
if (inotify_add_watch(fd, "/dev/.systemd/readahead", IN_CREATE) < 0) {
log_error("Failed to watch /dev/.systemd/readahead: %m");
close_nointr_nofail(fd);
return -errno;
}
return fd;
}

View File

@ -32,4 +32,6 @@ int fs_on_ssd(const char *p);
bool enough_ram(void);
int open_inotify(void);
#endif

View File

@ -33,6 +33,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/inotify.h>
#include "missing.h"
#include "util.h"
@ -119,6 +120,7 @@ static int replay(const char *root) {
char *pack_fn = NULL, c;
bool on_ssd, ready = false;
int prio;
int inotify_fd = -1;
assert(root);
@ -141,6 +143,11 @@ static int replay(const char *root) {
goto finish;
}
if ((inotify_fd = open_inotify()) < 0) {
r = inotify_fd;
goto finish;
}
if (!(fgets(line, sizeof(line), pack))) {
log_error("Premature end of pack file.");
r = -EIO;
@ -177,8 +184,40 @@ static int replay(const char *root) {
log_debug("Replaying...");
if (access("/dev/.systemd/readahead/noreplay", F_OK) >= 0) {
log_debug("Got termination request");
goto done;
}
while (!feof(pack) && !ferror(pack)) {
uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
int k;
ssize_t n;
if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
if (errno != EINTR && errno != EAGAIN) {
log_error("Failed to read inotify event: %m");
r = -errno;
goto finish;
}
} else {
struct inotify_event *e = (struct inotify_event*) inotify_buffer;
while (n > 0) {
size_t step;
if ((e->mask & IN_CREATE) && streq(e->name, "noreplay")) {
log_debug("Got termination request");
goto done;
}
step = sizeof(struct inotify_event) + e->len;
assert(step <= (size_t) n);
e = (struct inotify_event*) ((uint8_t*) e + step);
n -= step;
}
}
if ((k = unpack_file(pack)) < 0) {
r = k;
@ -193,6 +232,7 @@ static int replay(const char *root) {
}
}
done:
if (!ready)
sd_notify(0, "READY=1");
@ -208,6 +248,9 @@ finish:
if (pack)
fclose(pack);
if (inotify_fd >= 0)
close_nointr_nofail(inotify_fd);
free(pack_fn);
return r;

View File

@ -433,3 +433,41 @@ int sd_booted(void) {
return a.st_dev != b.st_dev;
#endif
}
static int touch(const char *path) {
#if !defined(DISABLE_SYSTEMD) && defined(__linux__)
int fd;
mkdir("/dev/.systemd", 0755);
mkdir("/dev/.systemd/readahead", 0755);
if ((fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, 0666)) < 0)
return -errno;
for (;;) {
if (close(fd) >= 0)
break;
if (errno != -EINTR)
return -errno;
}
#endif
return 0;
}
int sd_readahead(const char *action) {
if (!action)
return -EINVAL;
if (strcmp(action, "cancel") == 0)
return touch("/dev/.systemd/readahead/cancel");
else if (strcmp(action, "done") == 0)
return touch("/dev/.systemd/readahead/done");
else if (strcmp(action, "noreplay") == 0)
return touch("/dev/.systemd/readahead/noreplay");
return -EINVAL;
}

View File

@ -254,6 +254,16 @@ int sd_notifyf(int unset_environment, const char *format, ...) _sd_printf_attr_(
*/
int sd_booted(void) _sd_hidden_;
/*
Controls ongoing disk read-ahead operations during boot-up. The argument
must be a string, and either "cancel", "done" or "noreplay".
cancel = terminate read-ahead data collection, drop collected information
done = terminate read-ahead data collection, keep collected information
noreplay = terminate read-ahead replay
*/
int sd_readahead(const char *action);
#ifdef __cplusplus
}
#endif