From 6624768c9c39ab409edebe07cb06ecd93cc6f3ed Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 26 Sep 2010 15:50:14 +0200 Subject: [PATCH] readahead: add interface to sd-daemon.[ch] to control readahead --- Makefile.am | 1 + fixme | 4 + man/sd_readahead.xml | 178 ++++++++++++++++++++++++++++++++++++++++ man/systemd-notify.xml | 11 +++ src/notify.c | 42 +++++++--- src/readahead-collect.c | 73 ++++++++++++++-- src/readahead-common.c | 21 +++++ src/readahead-common.h | 2 + src/readahead-replay.c | 43 ++++++++++ src/sd-daemon.c | 38 +++++++++ src/sd-daemon.h | 10 +++ 11 files changed, 405 insertions(+), 18 deletions(-) create mode 100644 man/sd_readahead.xml diff --git a/Makefile.am b/Makefile.am index 2cd3debf83..70a6c190f9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -421,6 +421,7 @@ MANPAGES = \ man/systemd-cgls.1 \ man/systemd-notify.1 \ man/sd_notify.3 \ + man/sd_readahead.3 \ man/sd_booted.3 \ man/sd_listen_fds.3 \ man/sd_is_fifo.3 \ diff --git a/fixme b/fixme index 16aabdaa1c..7e025c429a 100644 --- a/fixme +++ b/fixme @@ -96,6 +96,10 @@ * readahead() vs. fadvise() vs. ioprio +* unneeded + +* properly handle multiple inotify events per read() in path.c and util.c + External: * place /etc/inittab with explaining blurb. diff --git a/man/sd_readahead.xml b/man/sd_readahead.xml new file mode 100644 index 0000000000..178f907afa --- /dev/null +++ b/man/sd_readahead.xml @@ -0,0 +1,178 @@ + + + + + + + + + sd_readahead + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_readahead + 3 + + + + sd_readahead + Control ongoing disk read-ahead operations + + + + + #include "sd-daemon.h" + + + int sd_readahead + const char *action + + + + + + Description + sd_readahead() may be + called by programs involved with early boot-up to + control ongoing disk read-ahead operations. It may be + used to terminate read-ahead operations in case an + uncommon disk access pattern is to be expected and + hence read-ahead replay or collection is unlikely to + have the desired speed-up effect on the current or + future boot-ups. + + The action should be one + of the following strings: + + + + cancel + + Terminates read-ahead + data collection, and drops all + read-ahead data collected during this + boot-up. + + + + done + + Terminates read-ahead + data collection, but keeps all + read-ahead data collected during this + boot-up around for use during + subsequent boot-ups. + + + + noreplay + + Terminates read-ahead + replay. + + + + + + + + Return Value + + On failure, these calls return a negative + errno-style error code. It is generally recommended to + ignore the return value of this call. + + + + Notes + + This function is provided by the reference + implementation of APIs for new-style daemons and + distributed with the systemd package. The algorithm + it implements is simple, and can easily be + reimplemented in daemons if it is important to support + this interface without using the reference + implementation. + + Internally, this function creates a file in + /dev/.systemd/readahead/ which is + then used as flag file to notify the read-ahead + subsystem. + + For details about the algorithm check the + liberally licensed reference implementation sources: + + resp. + + sd_readahead() is + implemented in the reference implementation's drop-in + sd-daemon.c and + sd-daemon.h files. It is + recommended that applications consuming this API copy + the implementation into their source tree. For more + details about the reference implementation see + sd_daemon7 + + If -DDISABLE_SYSTEMD is set during compilation + this function will always return 0 and otherwise + become a NOP. + + + + Examples + + + Cancelling all read-ahead operations + + During boots where SELinux has to + relabel the file system hierarchy, it will + create a large amount of disk accesses that + are not necessary during normal boots. Hence + it is a good idea to disable both read-ahead replay and read-ahead collection. + + + sd_readahead("cancel"); +sd_readahead("noreplay"); + + + + + + See Also + + systemd1, + sd_daemon7, + daemon7 + + + + diff --git a/man/systemd-notify.xml b/man/systemd-notify.xml index 5286418cc0..59d6b2fd87 100644 --- a/man/systemd-notify.xml +++ b/man/systemd-notify.xml @@ -147,6 +147,17 @@ semantics of this option see sd_booted3. + + + + + Controls disk + read-ahead operations. The argument + must be a string, and either "cancel", + "done" or "noreplay". For details + about the semantics of this option see + sd_readahead3. + diff --git a/src/notify.c b/src/notify.c index 28cfe23b23..61c4b0f92f 100644 --- a/src/notify.c +++ b/src/notify.c @@ -36,16 +36,18 @@ static bool arg_ready = false; static pid_t arg_pid = 0; static const char *arg_status = NULL; static bool arg_booted = false; +static const char *arg_readahead = NULL; static int help(void) { printf("%s [OPTIONS...] [VARIABLE=VALUE...]\n\n" "Notify the init system about service status updates.\n\n" - " -h --help Show this help\n" - " --ready Inform the init system about service start-up completion\n" - " --pid[=PID] Set main pid of daemon\n" - " --status=TEXT Set status text\n" - " --booted Returns 0 if the system was booted up with systemd, non-zero otherwise\n", + " -h --help Show this help\n" + " --ready Inform the init system about service start-up completion\n" + " --pid[=PID] Set main pid of daemon\n" + " --status=TEXT Set status text\n" + " --booted Returns 0 if the system was booted up with systemd, non-zero otherwise\n" + " --readahead=ACTION Controls read-ahead operations\n", program_invocation_short_name); return 0; @@ -57,16 +59,18 @@ static int parse_argv(int argc, char *argv[]) { ARG_READY = 0x100, ARG_PID, ARG_STATUS, - ARG_BOOTED + ARG_BOOTED, + ARG_READAHEAD }; static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "ready", no_argument, NULL, ARG_READY }, - { "pid", optional_argument, NULL, ARG_PID }, - { "status", required_argument, NULL, ARG_STATUS }, - { "booted", no_argument, NULL, ARG_BOOTED }, - { NULL, 0, NULL, 0 } + { "help", no_argument, NULL, 'h' }, + { "ready", no_argument, NULL, ARG_READY }, + { "pid", optional_argument, NULL, ARG_PID }, + { "status", required_argument, NULL, ARG_STATUS }, + { "booted", no_argument, NULL, ARG_BOOTED }, + { "readahead", required_argument, NULL, ARG_READAHEAD }, + { NULL, 0, NULL, 0 } }; int c; @@ -106,6 +110,10 @@ static int parse_argv(int argc, char *argv[]) { arg_booted = true; break; + case ARG_READAHEAD: + arg_readahead = optarg; + break; + case '?': return -EINVAL; @@ -119,7 +127,8 @@ static int parse_argv(int argc, char *argv[]) { !arg_ready && !arg_status && !arg_pid && - !arg_booted) { + !arg_booted && + !arg_readahead) { help(); return -EINVAL; } @@ -144,6 +153,13 @@ int main(int argc, char* argv[]) { if (arg_booted) return sd_booted() <= 0; + if (arg_readahead) { + if ((r = sd_readahead(arg_readahead)) < 0) { + log_error("Failed to issue read-ahead control command: %s", strerror(-r)); + goto finish; + } + } + if (arg_ready) our_env[i++] = (char*) "READY=1"; diff --git a/src/readahead-collect.c b/src/readahead-collect.c index 817b958f61..aa136ce50e 100644 --- a/src/readahead-collect.c +++ b/src/readahead-collect.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "missing.h" #include "util.h" @@ -56,6 +57,7 @@ * - sd_readahead_cancel * - gzip? * - remount rw? + * - handle files where nothing is in mincore * - does ioprio_set work with fadvise()? */ @@ -199,12 +201,13 @@ static int qsort_compare(const void *a, const void *b) { static int collect(const char *root) { enum { - FD_FANOTIFY, + FD_FANOTIFY, /* Get the actualy fs events */ FD_SIGNAL, + FD_INOTIFY, /* We get notifications to quit early via this fd */ _FD_MAX }; struct pollfd pollfd[_FD_MAX]; - int fanotify_fd = -1, signal_fd = -1, r = 0; + int fanotify_fd = -1, signal_fd = -1, inotify_fd = -1, r = 0; pid_t my_pid; Hashmap *files = NULL; Iterator i; @@ -251,6 +254,11 @@ static int collect(const char *root) { goto finish; } + if ((inotify_fd = open_inotify()) < 0) { + r = inotify_fd; + goto finish; + } + not_after = now(CLOCK_MONOTONIC) + arg_timeout; my_pid = getpid(); @@ -260,6 +268,8 @@ static int collect(const char *root) { pollfd[FD_FANOTIFY].events = POLLIN; pollfd[FD_SIGNAL].fd = signal_fd; pollfd[FD_SIGNAL].events = POLLIN; + pollfd[FD_INOTIFY].fd = inotify_fd; + pollfd[FD_INOTIFY].events = POLLIN; sd_notify(0, "READY=1\n" @@ -267,6 +277,17 @@ static int collect(const char *root) { log_debug("Collecting..."); + if (access("/dev/.systemd/readahead/cancel", F_OK) >= 0) { + log_debug("Collection canceled"); + r = -ECANCELED; + goto finish; + } + + if (access("/dev/.systemd/readahead/done", F_OK) >= 0) { + log_debug("Got termination request"); + goto done; + } + for (;;) { union { struct fanotify_event_metadata metadata; @@ -298,14 +319,52 @@ static int collect(const char *root) { goto finish; } - if (pollfd[FD_SIGNAL].revents != 0) - break; - if (h == 0) { log_debug("Reached maximum collection time, ending collection."); break; } + if (pollfd[FD_SIGNAL].revents) { + log_debug("Got signal."); + break; + } + + if (pollfd[FD_INOTIFY].revents) { + uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX]; + struct inotify_event *e; + + if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + + log_error("Failed to read inotify event: %m"); + r = -errno; + goto finish; + } + + e = (struct inotify_event*) inotify_buffer; + while (n > 0) { + size_t step; + + if ((e->mask & IN_CREATE) && streq(e->name, "cancel")) { + log_debug("Collection canceled"); + r = -ECANCELED; + goto finish; + } + + if ((e->mask & IN_CREATE) && streq(e->name, "done")) { + log_debug("Got termination request"); + goto done; + } + + step = sizeof(struct inotify_event) + e->len; + assert(step <= (size_t) n); + + e = (struct inotify_event*) ((uint8_t*) e + step); + n -= step; + } + } + if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) { if (errno == EINTR || errno == EAGAIN) @@ -352,6 +411,7 @@ static int collect(const char *root) { } } +done: if (fanotify_fd >= 0) { close_nointr_nofail(fanotify_fd); fanotify_fd = -1; @@ -451,6 +511,9 @@ finish: if (signal_fd >= 0) close_nointr_nofail(signal_fd); + if (inotify_fd >= 0) + close_nointr_nofail(inotify_fd); + if (pack) { fclose(pack); unlink(pack_fn_new); diff --git a/src/readahead-common.c b/src/readahead-common.c index a1016a3ede..a2f6f173e9 100644 --- a/src/readahead-common.c +++ b/src/readahead-common.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "log.h" #include "readahead-common.h" @@ -116,3 +117,23 @@ bool enough_ram(void) { * with at least 128MB * memory */ } + +int open_inotify(void) { + int fd; + + if ((fd = inotify_init1(IN_CLOEXEC|IN_NONBLOCK)) < 0) { + log_error("Failed to create inotify handle: %m"); + return -errno; + } + + mkdir("/dev/.systemd", 0755); + mkdir("/dev/.systemd/readahead", 0755); + + if (inotify_add_watch(fd, "/dev/.systemd/readahead", IN_CREATE) < 0) { + log_error("Failed to watch /dev/.systemd/readahead: %m"); + close_nointr_nofail(fd); + return -errno; + } + + return fd; +} diff --git a/src/readahead-common.h b/src/readahead-common.h index c7fd713e13..3f64f290a9 100644 --- a/src/readahead-common.h +++ b/src/readahead-common.h @@ -32,4 +32,6 @@ int fs_on_ssd(const char *p); bool enough_ram(void); +int open_inotify(void); + #endif diff --git a/src/readahead-replay.c b/src/readahead-replay.c index d4ddf26aac..a5a2936b3e 100644 --- a/src/readahead-replay.c +++ b/src/readahead-replay.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "missing.h" #include "util.h" @@ -119,6 +120,7 @@ static int replay(const char *root) { char *pack_fn = NULL, c; bool on_ssd, ready = false; int prio; + int inotify_fd = -1; assert(root); @@ -141,6 +143,11 @@ static int replay(const char *root) { goto finish; } + if ((inotify_fd = open_inotify()) < 0) { + r = inotify_fd; + goto finish; + } + if (!(fgets(line, sizeof(line), pack))) { log_error("Premature end of pack file."); r = -EIO; @@ -177,8 +184,40 @@ static int replay(const char *root) { log_debug("Replaying..."); + if (access("/dev/.systemd/readahead/noreplay", F_OK) >= 0) { + log_debug("Got termination request"); + goto done; + } + while (!feof(pack) && !ferror(pack)) { + uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX]; int k; + ssize_t n; + + if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) { + if (errno != EINTR && errno != EAGAIN) { + log_error("Failed to read inotify event: %m"); + r = -errno; + goto finish; + } + } else { + struct inotify_event *e = (struct inotify_event*) inotify_buffer; + + while (n > 0) { + size_t step; + + if ((e->mask & IN_CREATE) && streq(e->name, "noreplay")) { + log_debug("Got termination request"); + goto done; + } + + step = sizeof(struct inotify_event) + e->len; + assert(step <= (size_t) n); + + e = (struct inotify_event*) ((uint8_t*) e + step); + n -= step; + } + } if ((k = unpack_file(pack)) < 0) { r = k; @@ -193,6 +232,7 @@ static int replay(const char *root) { } } +done: if (!ready) sd_notify(0, "READY=1"); @@ -208,6 +248,9 @@ finish: if (pack) fclose(pack); + if (inotify_fd >= 0) + close_nointr_nofail(inotify_fd); + free(pack_fn); return r; diff --git a/src/sd-daemon.c b/src/sd-daemon.c index 9c23b917f9..316fccc50a 100644 --- a/src/sd-daemon.c +++ b/src/sd-daemon.c @@ -433,3 +433,41 @@ int sd_booted(void) { return a.st_dev != b.st_dev; #endif } + +static int touch(const char *path) { + +#if !defined(DISABLE_SYSTEMD) && defined(__linux__) + int fd; + + mkdir("/dev/.systemd", 0755); + mkdir("/dev/.systemd/readahead", 0755); + + if ((fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, 0666)) < 0) + return -errno; + + for (;;) { + if (close(fd) >= 0) + break; + + if (errno != -EINTR) + return -errno; + } + +#endif + return 0; +} + +int sd_readahead(const char *action) { + + if (!action) + return -EINVAL; + + if (strcmp(action, "cancel") == 0) + return touch("/dev/.systemd/readahead/cancel"); + else if (strcmp(action, "done") == 0) + return touch("/dev/.systemd/readahead/done"); + else if (strcmp(action, "noreplay") == 0) + return touch("/dev/.systemd/readahead/noreplay"); + + return -EINVAL; +} diff --git a/src/sd-daemon.h b/src/sd-daemon.h index 008a44c1d2..2fbfe955ce 100644 --- a/src/sd-daemon.h +++ b/src/sd-daemon.h @@ -254,6 +254,16 @@ int sd_notifyf(int unset_environment, const char *format, ...) _sd_printf_attr_( */ int sd_booted(void) _sd_hidden_; +/* + Controls ongoing disk read-ahead operations during boot-up. The argument + must be a string, and either "cancel", "done" or "noreplay". + + cancel = terminate read-ahead data collection, drop collected information + done = terminate read-ahead data collection, keep collected information + noreplay = terminate read-ahead replay +*/ +int sd_readahead(const char *action); + #ifdef __cplusplus } #endif