Merge pull request #12028 from poettering/start-limit-hit

core: some start limit checking improvements + refactoring
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2019-03-19 13:40:55 +01:00 committed by GitHub
commit 17b70256f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 93 additions and 74 deletions

View File

@ -70,7 +70,7 @@ sensor:modalias:acpi:INVN6500*:dmi:*svn*Acer*:*pn*AspireSW5-012*
sensor:modalias:acpi:BMA250E*:dmi:*:svnAcer:pnIconiaW1-810:*
ACCEL_MOUNT_MATRIX=1, 0, 0; 0, -1, 0; 0, 0, 1
sensor:modalias:acpi:SMO8500:*:dmi:*Acer*:pnOneS1002*
ACCEL_MOUNT_MATRIX=-1, 0, 0; 0, 1, 0; 0, 0, -1

View File

@ -796,7 +796,6 @@ fail:
static int automount_start(Unit *u) {
Automount *a = AUTOMOUNT(u);
Unit *trigger;
int r;
assert(a);
@ -807,13 +806,11 @@ static int automount_start(Unit *u) {
return -EEXIST;
}
trigger = UNIT_TRIGGER(u);
if (!trigger || trigger->load_state != UNIT_LOADED) {
log_unit_error(u, "Refusing to start, unit to trigger not loaded.");
return -ENOENT;
}
r = unit_test_trigger_loaded(u);
if (r < 0)
return r;
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
return r;

View File

@ -20,7 +20,7 @@ static void log_and_status(Manager *m, bool warn, const char *message, const cha
"%s: %s", message, reason);
}
int emergency_action(
void emergency_action(
Manager *m,
EmergencyAction action,
EmergencyActionFlags options,
@ -33,11 +33,11 @@ int emergency_action(
assert(action < _EMERGENCY_ACTION_MAX);
if (action == EMERGENCY_ACTION_NONE)
return -ECANCELED;
return;
if (FLAGS_SET(options, EMERGENCY_ACTION_IS_WATCHDOG) && !m->service_watchdogs) {
log_warning("Watchdog disabled! Not acting on: %s", reason);
return -ECANCELED;
return;
}
bool warn = FLAGS_SET(options, EMERGENCY_ACTION_WARN);
@ -125,8 +125,6 @@ int emergency_action(
default:
assert_not_reached("Unknown emergency action");
}
return -ECANCELED;
}
static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {

View File

@ -24,9 +24,9 @@ typedef enum EmergencyActionFlags {
#include "macro.h"
#include "manager.h"
int emergency_action(Manager *m,
EmergencyAction action, EmergencyActionFlags options,
const char *reboot_arg, int exit_status, const char *reason);
void emergency_action(Manager *m,
EmergencyAction action, EmergencyActionFlags options,
const char *reboot_arg, int exit_status, const char *reason);
const char* emergency_action_to_string(EmergencyAction i) _const_;
EmergencyAction emergency_action_from_string(const char *s) _pure_;

View File

@ -1104,7 +1104,7 @@ static int mount_start(Unit *u) {
assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
return r;

View File

@ -555,19 +555,16 @@ static void path_mkdir(Path *p) {
static int path_start(Unit *u) {
Path *p = PATH(u);
Unit *trigger;
int r;
assert(p);
assert(IN_SET(p->state, PATH_DEAD, PATH_FAILED));
trigger = UNIT_TRIGGER(u);
if (!trigger || trigger->load_state != UNIT_LOADED) {
log_unit_error(u, "Refusing to start, unit to trigger not loaded.");
return -ENOENT;
}
r = unit_test_trigger_loaded(u);
if (r < 0)
return r;
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
return r;

View File

@ -2364,7 +2364,7 @@ static int service_start(Unit *u) {
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
/* Make sure we don't enter a busy loop of some kind. */
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
return r;

View File

@ -2487,7 +2487,7 @@ static int socket_start(Unit *u) {
assert(IN_SET(s->state, SOCKET_DEAD, SOCKET_FAILED));
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
return r;

View File

@ -871,7 +871,7 @@ static int swap_start(Unit *u) {
if (UNIT(other)->job && UNIT(other)->job->state == JOB_RUNNING)
return -EAGAIN;
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
return r;

View File

@ -595,19 +595,16 @@ fail:
static int timer_start(Unit *u) {
Timer *t = TIMER(u);
TimerValue *v;
Unit *trigger;
int r;
assert(t);
assert(IN_SET(t->state, TIMER_DEAD, TIMER_FAILED));
trigger = UNIT_TRIGGER(u);
if (!trigger || trigger->load_state != UNIT_LOADED) {
log_unit_error(u, "Refusing to start, unit to trigger not loaded.");
return -ENOENT;
}
r = unit_test_trigger_loaded(u);
if (r < 0)
return r;
r = unit_start_limit_test(u);
r = unit_test_start_limit(u);
if (r < 0) {
timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
return r;

View File

@ -1633,7 +1633,7 @@ static bool unit_condition_test_list(Unit *u, Condition *first, const char *(*to
return triggered != 0;
}
static bool unit_condition_test(Unit *u) {
static bool unit_test_condition(Unit *u) {
assert(u);
dual_timestamp_get(&u->condition_timestamp);
@ -1644,7 +1644,7 @@ static bool unit_condition_test(Unit *u) {
return u->condition_result;
}
static bool unit_assert_test(Unit *u) {
static bool unit_test_assert(Unit *u) {
assert(u);
dual_timestamp_get(&u->assert_timestamp);
@ -1667,7 +1667,7 @@ void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg
REENABLE_WARNING;
}
int unit_start_limit_test(Unit *u) {
int unit_test_start_limit(Unit *u) {
const char *reason;
assert(u);
@ -1682,9 +1682,11 @@ int unit_start_limit_test(Unit *u) {
reason = strjoina("unit ", u->id, " failed");
return emergency_action(u->manager, u->start_limit_action,
EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
u->reboot_arg, -1, reason);
emergency_action(u->manager, u->start_limit_action,
EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
u->reboot_arg, -1, reason);
return -ECANCELED;
}
bool unit_shall_confirm_spawn(Unit *u) {
@ -1725,27 +1727,31 @@ static bool unit_verify_deps(Unit *u) {
return true;
}
/* Errors:
* -EBADR: This unit type does not support starting.
/* Errors that aren't really errors:
* -EALREADY: Unit is already started.
* -ECOMM: Condition failed
* -EAGAIN: An operation is already in progress. Retry later.
* -ECANCELED: Too many requests for now.
*
* Errors that are real errors:
* -EBADR: This unit type does not support starting.
* -ECANCELED: Start limit hit, too many requests for now
* -EPROTO: Assert failed
* -EINVAL: Unit not loaded
* -EOPNOTSUPP: Unit type not supported
* -ENOLINK: The necessary dependencies are not fulfilled.
* -ESTALE: This unit has been started before and can't be started a second time
* -ENOENT: This is a triggering unit and unit to trigger is not loaded
*/
int unit_start(Unit *u) {
UnitActiveState state;
Unit *following;
int r;
assert(u);
/* If this is already started, then this will succeed. Note
* that this will even succeed if this unit is not startable
* by the user. This is relied on to detect when we need to
* wait for units and when waiting is finished. */
/* If this is already started, then this will succeed. Note that this will even succeed if this unit
* is not startable by the user. This is relied on to detect when we need to wait for units and when
* waiting is finished. */
state = unit_active_state(u);
if (UNIT_IS_ACTIVE_OR_RELOADING(state))
return -EALREADY;
@ -1758,35 +1764,45 @@ int unit_start(Unit *u) {
if (UNIT_VTABLE(u)->once_only && dual_timestamp_is_set(&u->inactive_enter_timestamp))
return -ESTALE;
/* If the conditions failed, don't do anything at all. If we
* already are activating this call might still be useful to
* speed up activation in case there is some hold-off time,
* but we don't want to recheck the condition in that case. */
/* If the conditions failed, don't do anything at all. If we already are activating this call might
* still be useful to speed up activation in case there is some hold-off time, but we don't want to
* recheck the condition in that case. */
if (state != UNIT_ACTIVATING &&
!unit_condition_test(u)) {
log_unit_debug(u, "Starting requested but condition failed. Not starting unit.");
return -ECOMM;
!unit_test_condition(u)) {
/* Let's also check the start limit here. Normally, the start limit is only checked by the
* .start() method of the unit type after it did some additional checks verifying everything
* is in order (so that those other checks can propagate errors properly). However, if a
* condition check doesn't hold we don't get that far but we should still ensure we are not
* called in a tight loop without a rate limit check enforced, hence do the check here. Note
* that ECOMM is generally not a reason for a job to fail, unlike most other errors here,
* hence the chance is big that any triggering unit for us will trigger us again. Note this
* condition check is a bit different from the condition check inside the per-unit .start()
* function, as this one will not change the unit's state in any way (and we shouldn't here,
* after all the condition failed). */
r = unit_test_start_limit(u);
if (r < 0)
return r;
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(ECOMM), "Starting requested but condition failed. Not starting unit.");
}
/* If the asserts failed, fail the entire job */
if (state != UNIT_ACTIVATING &&
!unit_assert_test(u)) {
log_unit_notice(u, "Starting requested but asserts failed.");
return -EPROTO;
}
!unit_test_assert(u))
return log_unit_notice_errno(u, SYNTHETIC_ERRNO(EPROTO), "Starting requested but asserts failed.");
/* Units of types that aren't supported cannot be
* started. Note that we do this test only after the condition
* checks, so that we rather return condition check errors
* (which are usually not considered a true failure) than "not
* supported" errors (which are considered a failure).
/* Units of types that aren't supported cannot be started. Note that we do this test only after the
* condition checks, so that we rather return condition check errors (which are usually not
* considered a true failure) than "not supported" errors (which are considered a failure).
*/
if (!unit_supported(u))
return -EOPNOTSUPP;
/* Let's make sure that the deps really are in order before we start this. Normally the job engine should have
* taken care of this already, but let's check this here again. After all, our dependencies might not be in
* effect anymore, due to a reload or due to a failed condition. */
/* Let's make sure that the deps really are in order before we start this. Normally the job engine
* should have taken care of this already, but let's check this here again. After all, our
* dependencies might not be in effect anymore, due to a reload or due to a failed condition. */
if (!unit_verify_deps(u))
return -ENOLINK;
@ -1801,11 +1817,9 @@ int unit_start(Unit *u) {
if (!UNIT_VTABLE(u)->start)
return -EBADR;
/* We don't suppress calls to ->start() here when we are
* already starting, to allow this request to be used as a
* "hurry up" call, for example when the unit is in some "auto
* restart" state where it waits for a holdoff timer to elapse
* before it will start again. */
/* We don't suppress calls to ->start() here when we are already starting, to allow this request to
* be used as a "hurry up" call, for example when the unit is in some "auto restart" state where it
* waits for a holdoff timer to elapse before it will start again. */
unit_add_to_dbus_queue(u);
@ -2501,10 +2515,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
if (os != UNIT_FAILED && ns == UNIT_FAILED) {
reason = strjoina("unit ", u->id, " failed");
(void) emergency_action(m, u->failure_action, 0, u->reboot_arg, unit_failure_action_exit_status(u), reason);
emergency_action(m, u->failure_action, 0, u->reboot_arg, unit_failure_action_exit_status(u), reason);
} else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE) {
reason = strjoina("unit ", u->id, " succeeded");
(void) emergency_action(m, u->success_action, 0, u->reboot_arg, unit_success_action_exit_status(u), reason);
emergency_action(m, u->success_action, 0, u->reboot_arg, unit_success_action_exit_status(u), reason);
}
}
@ -5589,6 +5603,20 @@ int unit_success_action_exit_status(Unit *u) {
return r;
}
int unit_test_trigger_loaded(Unit *u) {
Unit *trigger;
/* Tests whether the unit to trigger is loaded */
trigger = UNIT_TRIGGER(u);
if (!trigger)
return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOENT), "Refusing to start, unit to trigger not loaded.");
if (trigger->load_state != UNIT_LOADED)
return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOENT), "Refusing to start, unit %s to trigger not loaded.", u->id);
return 0;
}
static const char* const collect_mode_table[_COLLECT_MODE_MAX] = {
[COLLECT_INACTIVE] = "inactive",
[COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed",

View File

@ -778,7 +778,7 @@ static inline bool unit_supported(Unit *u) {
void unit_warn_if_dir_nonempty(Unit *u, const char* where);
int unit_fail_if_noncanonical(Unit *u, const char* where);
int unit_start_limit_test(Unit *u);
int unit_test_start_limit(Unit *u);
void unit_unref_uid(Unit *u, bool destroy_now);
int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc);
@ -830,6 +830,8 @@ int unit_exit_status(Unit *u);
int unit_success_action_exit_status(Unit *u);
int unit_failure_action_exit_status(Unit *u);
int unit_test_trigger_loaded(Unit *u);
/* Macros which append UNIT= or USER_UNIT= to the message */
#define log_unit_full(unit, level, error, ...) \