core: add support for a configurable system-wide start-up timeout

When this system-wide start-up timeout is hit we execute one of the
failure actions already implemented for services that fail.

This should not only be useful on embedded devices, but also on laptops
which have the power-button reachable when the lid is closed. This
devices, when in a backpack might get powered on by accident due to the
easily reachable power button. We want to make sure that the system
turns itself off if it starts up due this after a while.

When the system manages to fully start-up logind will suspend the
machine by default if the lid is closed. However, in some cases we don't
even get as far as logind, and the boot hangs much earlier, for example
because we ask for a LUKS password that nobody ever enters.

Yeah, this is a real-life problem on my Yoga 13, which has one of those
easily accessible power buttons, even if the device is closed.
This commit is contained in:
Lennart Poettering 2014-08-22 16:36:38 +02:00
parent 2de1851fe3
commit 2928b0a863
14 changed files with 275 additions and 104 deletions

View File

@ -1112,7 +1112,9 @@ libsystemd_core_la_SOURCES = \
src/core/audit-fd.c \
src/core/audit-fd.h \
src/core/show-status.c \
src/core/show-status.h
src/core/show-status.h \
src/core/failure-action.c \
src/core/failure-action.h
if HAVE_KMOD
libsystemd_core_la_SOURCES += \

View File

@ -254,7 +254,6 @@
signal.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>TimerSlackNSec=</varname></term>
@ -280,6 +279,32 @@
too.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>StartTimeoutSec=</varname></term>
<term><varname>StartTimeoutAction=</varname></term>
<term><varname>StartTimeoutRebootArgument=</varname></term>
<listitem><para>Configures an over-all
system start-up timeout and controls
what to do when the timeout is
reached. <varname>StartTimeoutSec=</varname>
specifies the timeout, and defaults to
<literal>15min</literal>. <varname>StartTimeoutAction=</varname>
configures the action to take when the
system did not finish boot-up within
the specified time. It takes the same
values as the per-service
<varname>StartLimitAction=</varname>
setting, see
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details. Defaults to
<option>reboot-force</option>. <varname>StartTimeoutRebootArgument=</varname>
configures an optional reboot string
to pass to the
<citerefentry><refentrytitle>reboot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system call.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>DefaultTimerAccuracySec=</varname></term>

View File

@ -1154,19 +1154,6 @@ ExecStart=/bin/echo $ONE $TWO ${TWO}</programlisting>
<option>none</option>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RebootArgument=</varname></term>
<listitem><para>Configure the optional
argument for the
<citerefentry><refentrytitle>reboot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system call if
<varname>StartLimitAction=</varname>
is a reboot action. This works just
like the optional argument to
<command>systemctl reboot</command>
command.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>FailureAction=</varname></term>
<listitem><para>Configure the action
@ -1178,6 +1165,20 @@ ExecStart=/bin/echo $ONE $TWO ${TWO}</programlisting>
</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RebootArgument=</varname></term>
<listitem><para>Configure the optional
argument for the
<citerefentry><refentrytitle>reboot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system call if
<varname>StartLimitAction=</varname>
or <varname>FailureAction=</varname>
is a reboot action. This works just
like the optional argument to
<command>systemctl reboot</command>
command.</para></listitem>
</varlistentry>
</variablelist>
<para>Check

94
src/core/failure-action.c Normal file
View File

@ -0,0 +1,94 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
Copyright 2012 Michael Olbrich
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <sys/reboot.h>
#include <linux/reboot.h>
#include <sys/syscall.h>
#include "bus-util.h"
#include "bus-error.h"
#include "special.h"
#include "failure-action.h"
int failure_action(
Manager *m,
FailureAction action,
const char *reboot_arg) {
int r;
assert(m);
assert(action >= 0);
assert(action < _FAILURE_ACTION_MAX);
switch (action) {
case FAILURE_ACTION_NONE:
break;
case FAILURE_ACTION_REBOOT: {
_cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
log_warning("Rebooting as result of failure.");
update_reboot_param_file(reboot_arg);
r = manager_add_job_by_name(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE, true, &error, NULL);
if (r < 0)
log_error("Failed to reboot: %s.", bus_error_message(&error, r));
break;
}
case FAILURE_ACTION_REBOOT_FORCE:
log_warning("Forcibly rebooting as result of failure.");
update_reboot_param_file(reboot_arg);
m->exit_code = MANAGER_REBOOT;
break;
case FAILURE_ACTION_REBOOT_IMMEDIATE:
log_warning("Rebooting immediately as result of failure.");
sync();
if (reboot_arg) {
log_info("Rebooting with argument '%s'.", reboot_arg);
syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, reboot_arg);
}
log_info("Rebooting.");
reboot(RB_AUTOBOOT);
break;
default:
assert_not_reached("Unknown failure action");
}
return -ECANCELED;
}
static const char* const failure_action_table[_FAILURE_ACTION_MAX] = {
[FAILURE_ACTION_NONE] = "none",
[FAILURE_ACTION_REBOOT] = "reboot",
[FAILURE_ACTION_REBOOT_FORCE] = "reboot-force",
[FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate"
};
DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction);

40
src/core/failure-action.h Normal file
View File

@ -0,0 +1,40 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#pragma once
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
Copyright 2012 Michael Olbrich
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
typedef enum FailureAction {
FAILURE_ACTION_NONE,
FAILURE_ACTION_REBOOT,
FAILURE_ACTION_REBOOT_FORCE,
FAILURE_ACTION_REBOOT_IMMEDIATE,
_FAILURE_ACTION_MAX,
_FAILURE_ACTION_INVALID = -1
} FailureAction;
#include "macro.h"
#include "manager.h"
int failure_action(Manager *m, FailureAction action, const char *reboot_arg);
const char* failure_action_to_string(FailureAction i) _const_;
FailureAction failure_action_from_string(const char *s) _pure_;

View File

@ -116,6 +116,9 @@ static FILE* arg_serialization = NULL;
static bool arg_default_cpu_accounting = false;
static bool arg_default_blockio_accounting = false;
static bool arg_default_memory_accounting = false;
static usec_t arg_start_timeout_usec = DEFAULT_MANAGER_START_TIMEOUT_USEC;
static FailureAction arg_start_timeout_action = FAILURE_ACTION_REBOOT_FORCE;
static char *arg_start_timeout_reboot_arg = NULL;
static void nop_handler(int sig) {}
@ -669,6 +672,9 @@ static int parse_config_file(void) {
{ "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "StartTimeoutSec", config_parse_sec, 0, &arg_start_timeout_usec },
{ "Manager", "StartTimeoutAction", config_parse_failure_action, 0, &arg_start_timeout_action },
{ "Manager", "StartTimeoutRebootArgument",config_parse_string, 0, &arg_start_timeout_reboot_arg },
{}
};
@ -1628,6 +1634,10 @@ int main(int argc, char *argv[]) {
m->default_memory_accounting = arg_default_memory_accounting;
m->runtime_watchdog = arg_runtime_watchdog;
m->shutdown_watchdog = arg_shutdown_watchdog;
m->start_timeout_usec = arg_start_timeout_usec;
m->start_timeout_action = arg_start_timeout_action;
free_and_strdup(&m->start_timeout_reboot_arg, arg_start_timeout_reboot_arg);
m->userspace_timestamp = userspace_timestamp;
m->kernel_timestamp = kernel_timestamp;
m->initrd_timestamp = initrd_timestamp;
@ -1816,6 +1826,9 @@ finish:
set_free(arg_syscall_archs);
arg_syscall_archs = NULL;
free(arg_start_timeout_reboot_arg);
arg_start_timeout_reboot_arg = NULL;
label_finish();
if (reexecute) {

View File

@ -435,6 +435,8 @@ int manager_new(SystemdRunningAs running_as, bool test_run, Manager **_m) {
m->running_as = running_as;
m->exit_code = _MANAGER_EXIT_CODE_INVALID;
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->start_timeout_usec = DEFAULT_MANAGER_START_TIMEOUT_USEC;
m->start_timeout_action = FAILURE_ACTION_REBOOT_FORCE;
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
@ -823,6 +825,9 @@ void manager_free(Manager *m) {
manager_close_idle_pipe(m);
sd_event_source_unref(m->start_timeout_event_source);
free(m->start_timeout_reboot_arg);
udev_unref(m->udev);
sd_event_unref(m->event);
@ -970,6 +975,20 @@ static int manager_distribute_fds(Manager *m, FDSet *fds) {
return 0;
}
static int on_start_timeout(sd_event_source *s, usec_t usec, void *userdata) {
Manager *m = userdata;
assert(s);
assert(m);
m->start_timeout_event_source = sd_event_source_unref(m->start_timeout_event_source);
log_error("Startup timed out.");
failure_action(m, m->start_timeout_action, m->start_timeout_reboot_arg);
return 0;
}
int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
int r, q;
@ -1042,6 +1061,22 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
m->send_reloading_done = true;
}
/* Possibly set up a start timeout */
if (!dual_timestamp_is_set(&m->finish_timestamp)) {
m->start_timeout_event_source = sd_event_source_unref(m->start_timeout_event_source);
if (m->start_timeout_usec) {
r = sd_event_add_time(
m->event,
&m->start_timeout_event_source,
CLOCK_MONOTONIC,
now(CLOCK_MONOTONIC) + m->start_timeout_usec, 0,
on_start_timeout, m);
if (r < 0)
log_error("Failed to add start timeout event: %s", strerror(-r));
}
}
return r;
}
@ -2462,10 +2497,8 @@ void manager_check_finished(Manager *m) {
if (hashmap_size(m->jobs) > 0) {
if (m->jobs_in_progress_event_source) {
sd_event_source_set_time(m->jobs_in_progress_event_source,
now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
}
if (m->jobs_in_progress_event_source)
sd_event_source_set_time(m->jobs_in_progress_event_source, now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
return;
}
@ -2487,6 +2520,8 @@ void manager_check_finished(Manager *m) {
dual_timestamp_get(&m->finish_timestamp);
m->start_timeout_event_source = sd_event_source_unref(m->start_timeout_event_source);
if (m->running_as == SYSTEMD_SYSTEM && detect_container(NULL) <= 0) {
/* Note that m->kernel_usec.monotonic is always at 0,

View File

@ -33,6 +33,8 @@
/* Enforce upper limit how many names we allow */
#define MANAGER_MAX_NAMES 131072 /* 128K */
#define DEFAULT_MANAGER_START_TIMEOUT_USEC (15*USEC_PER_MINUTE)
typedef struct Manager Manager;
typedef enum ManagerState {
@ -69,6 +71,7 @@ typedef enum ManagerExitCode {
#include "unit-name.h"
#include "exit-status.h"
#include "show-status.h"
#include "failure-action.h"
struct Manager {
/* Note that the set of units we know of is allowed to be
@ -152,6 +155,7 @@ struct Manager {
dual_timestamp initrd_timestamp;
dual_timestamp userspace_timestamp;
dual_timestamp finish_timestamp;
dual_timestamp security_start_timestamp;
dual_timestamp security_finish_timestamp;
dual_timestamp generators_start_timestamp;
@ -279,6 +283,12 @@ struct Manager {
/* Used for processing polkit authorization responses */
Hashmap *polkit_registry;
/* System wide startup timeouts */
usec_t start_timeout_usec;
sd_event_source *start_timeout_event_source;
FailureAction start_timeout_action;
char *start_timeout_reboot_arg;
};
int manager_new(SystemdRunningAs running_as, bool test_run, Manager **m);

View File

@ -23,9 +23,6 @@
#include <signal.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/reboot.h>
#include <linux/reboot.h>
#include <sys/syscall.h>
#include "async.h"
#include "manager.h"
@ -1052,8 +1049,6 @@ static int cgroup_good(Service *s) {
return !r;
}
static int service_execute_action(Service *s, FailureAction action, const char *reason, bool log_action_none);
static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) {
int r;
assert(s);
@ -1063,8 +1058,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD);
if (s->result != SERVICE_SUCCESS)
service_execute_action(s, s->failure_action, "failed", false);
if (s->result != SERVICE_SUCCESS) {
log_warning_unit(UNIT(s)->id, "%s failed.", UNIT(s)->id);
failure_action(UNIT(s)->manager, s->failure_action, s->reboot_arg);
}
if (allow_restart &&
!s->forbid_restart &&
@ -1601,67 +1598,15 @@ fail:
service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
}
static int service_execute_action(Service *s, FailureAction action, const char *reason, bool log_action_none) {
assert(s);
if (action == SERVICE_FAILURE_ACTION_REBOOT ||
action == SERVICE_FAILURE_ACTION_REBOOT_FORCE)
update_reboot_param_file(s->reboot_arg);
switch (action) {
case SERVICE_FAILURE_ACTION_NONE:
if (log_action_none)
log_warning_unit(UNIT(s)->id, "%s %s, refusing to start.", UNIT(s)->id, reason);
break;
case SERVICE_FAILURE_ACTION_REBOOT: {
_cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
int r;
log_warning_unit(UNIT(s)->id, "%s %s, rebooting.", UNIT(s)->id, reason);
r = manager_add_job_by_name(UNIT(s)->manager, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE, true, &error, NULL);
if (r < 0)
log_error_unit(UNIT(s)->id, "Failed to reboot: %s.", bus_error_message(&error, r));
break;
}
case SERVICE_FAILURE_ACTION_REBOOT_FORCE:
log_warning_unit(UNIT(s)->id, "%s %s, forcibly rebooting.", UNIT(s)->id, reason);
UNIT(s)->manager->exit_code = MANAGER_REBOOT;
break;
case SERVICE_FAILURE_ACTION_REBOOT_IMMEDIATE:
log_warning_unit(UNIT(s)->id, "%s %s, rebooting immediately.", UNIT(s)->id, reason);
sync();
if (s->reboot_arg) {
log_info("Rebooting with argument '%s'.", s->reboot_arg);
syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, s->reboot_arg);
}
log_info("Rebooting.");
reboot(RB_AUTOBOOT);
break;
default:
log_error_unit(UNIT(s)->id, "failure action=%i", action);
assert_not_reached("Unknown FailureAction.");
}
return -ECANCELED;
}
static int service_start_limit_test(Service *s) {
assert(s);
if (ratelimit_test(&s->start_limit))
return 0;
return service_execute_action(s, s->start_limit_action, "start request repeated too quickly", true);
log_warning_unit(UNIT(s)->id, "start request repeated too quickly for %s", UNIT(s)->id);
return failure_action(UNIT(s)->manager, s->start_limit_action, s->reboot_arg);
}
static int service_start(Unit *u) {
@ -2908,14 +2853,6 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult);
static const char* const failure_action_table[_SERVICE_FAILURE_ACTION_MAX] = {
[SERVICE_FAILURE_ACTION_NONE] = "none",
[SERVICE_FAILURE_ACTION_REBOOT] = "reboot",
[SERVICE_FAILURE_ACTION_REBOOT_FORCE] = "reboot-force",
[SERVICE_FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate"
};
DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction);
const UnitVTable service_vtable = {
.object_size = sizeof(Service),
.exec_context_offset = offsetof(Service, exec_context),

View File

@ -28,6 +28,7 @@ typedef struct Service Service;
#include "ratelimit.h"
#include "kill.h"
#include "exit-status.h"
#include "failure-action.h"
typedef enum ServiceState {
SERVICE_DEAD,
@ -113,15 +114,6 @@ typedef enum ServiceResult {
_SERVICE_RESULT_INVALID = -1
} ServiceResult;
typedef enum FailureAction {
SERVICE_FAILURE_ACTION_NONE,
SERVICE_FAILURE_ACTION_REBOOT,
SERVICE_FAILURE_ACTION_REBOOT_FORCE,
SERVICE_FAILURE_ACTION_REBOOT_IMMEDIATE,
_SERVICE_FAILURE_ACTION_MAX,
_SERVICE_FAILURE_ACTION_INVALID = -1
} FailureAction;
struct Service {
Unit meta;
@ -193,10 +185,9 @@ struct Service {
char *status_text;
int status_errno;
FailureAction failure_action;
RateLimit start_limit;
FailureAction start_limit_action;
FailureAction failure_action;
char *reboot_arg;
UnitRef accept_socket;
@ -234,6 +225,3 @@ NotifyState notify_state_from_string(const char *s) _pure_;
const char* service_result_to_string(ServiceResult i) _const_;
ServiceResult service_result_from_string(const char *s) _pure_;
const char* failure_action_to_string(FailureAction i) _const_;
FailureAction failure_action_from_string(const char *s) _pure_;

View File

@ -23,6 +23,9 @@
#CapabilityBoundingSet=
#SystemCallArchitectures=
#TimerSlackNSec=
#StartTimeoutSec=15min
#StartTimeoutAction=reboot-force
#StartTimeoutRebootArgument=
#DefaultTimerAccuracySec=1min
#DefaultStandardOutput=journal
#DefaultStandardError=inherit

View File

@ -7137,3 +7137,24 @@ int unquote_many_words(const char **p, ...) {
return c;
}
int free_and_strdup(char **p, const char *s) {
char *t;
assert(p);
/* Replaces a string pointer with an strdup()ed new string,
* possibly freeing the old one. */
if (s) {
t = strdup(s);
if (!t)
return -ENOMEM;
} else
t = NULL;
free(*p);
*p = t;
return 0;
}

View File

@ -978,3 +978,5 @@ int is_symlink(const char *path);
int unquote_first_word(const char **p, char **ret);
int unquote_many_words(const char **p, ...) _sentinel_;
int free_and_strdup(char **p, const char *s);

View File

@ -63,7 +63,7 @@ int main(int argc, char **argv) {
test_table(device_state, DEVICE_STATE);
test_table(exec_input, EXEC_INPUT);
test_table(exec_output, EXEC_OUTPUT);
test_table(failure_action, SERVICE_FAILURE_ACTION);
test_table(failure_action, FAILURE_ACTION);
test_table(job_mode, JOB_MODE);
test_table(job_result, JOB_RESULT);
test_table(job_state, JOB_STATE);