manager: Fix HW watchdog when systemd starts before driver loaded

When manager_{set|override}_watchdog is called, set the watchdog timeout
regardless of whether the hardware watchdog was successfully initialized.  If
the watchdog was requested but could not be initialized, then instead of
pinging it, attempt to initialize it again.  This ensures that the hardware
watchdog is initialized even if the kernel module for it isn't loaded when
systemd starts (which is quite likely, unless it is compiled in).

This builds on work by @danc86 in https://github.com/systemd/systemd/pull/17460,
but fixes the issue of not updating the watchdog timeout with the actual value
from the hardware.

Fixes https://github.com/systemd/systemd/issues/17838

Co-authored-by: Dan Callaghan <djc@djc.id.au>
Co-authored-by: Michael Marley <michael@michaelmarley.com>
This commit is contained in:
Michael Marley 2020-12-07 21:27:38 -05:00 committed by Luca Boccassi
parent 9e36b885b8
commit 61927b9f11
2 changed files with 36 additions and 9 deletions

View File

@ -2937,8 +2937,10 @@ int manager_loop(Manager *m) {
usec_t wait_usec, watchdog_usec;
watchdog_usec = manager_get_watchdog(m, WATCHDOG_RUNTIME);
if (timestamp_is_set(watchdog_usec))
if (m->runtime_watchdog_running)
(void) watchdog_ping();
else if (timestamp_is_set(watchdog_usec))
manager_retry_runtime_watchdog(m);
if (!ratelimit_below(&rl)) {
/* Yay, something is going seriously wrong, pause a little */
@ -3408,14 +3410,18 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
if (t == WATCHDOG_RUNTIME)
if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) {
if (timestamp_is_set(timeout))
if (timestamp_is_set(timeout)) {
r = watchdog_set_timeout(&timeout);
else
if (r >= 0)
m->runtime_watchdog_running = true;
} else {
watchdog_close(true);
m->runtime_watchdog_running = false;
}
}
if (r >= 0)
m->watchdog[t] = timeout;
m->watchdog[t] = timeout;
}
int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
@ -3433,18 +3439,36 @@ int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
usec_t *p;
p = timestamp_is_set(timeout) ? &timeout : &m->watchdog[t];
if (timestamp_is_set(*p))
if (timestamp_is_set(*p)) {
r = watchdog_set_timeout(p);
else
if (r >= 0)
m->runtime_watchdog_running = true;
} else {
watchdog_close(true);
m->runtime_watchdog_running = false;
}
}
if (r >= 0)
m->watchdog_overridden[t] = timeout;
m->watchdog_overridden[t] = timeout;
return 0;
}
void manager_retry_runtime_watchdog(Manager *m) {
int r = 0;
assert(m);
if (timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
r = watchdog_set_timeout(&m->watchdog_overridden[WATCHDOG_RUNTIME]);
else
r = watchdog_set_timeout(&m->watchdog[WATCHDOG_RUNTIME]);
if (r >= 0)
m->runtime_watchdog_running = true;
}
static void manager_deserialize_uid_refs_one_internal(
Manager *m,
Hashmap** uid_refs,

View File

@ -241,6 +241,8 @@ struct Manager {
usec_t watchdog[_WATCHDOG_TYPE_MAX];
usec_t watchdog_overridden[_WATCHDOG_TYPE_MAX];
bool runtime_watchdog_running; /* Whether the runtime HW watchdog was started, so we know if we still need to get the real timeout from the hardware */
dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
/* Data specific to the device subsystem */
@ -562,6 +564,7 @@ ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
usec_t manager_get_watchdog(Manager *m, WatchdogType t);
void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout);
int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
void manager_retry_runtime_watchdog(Manager *m);
const char* oom_policy_to_string(OOMPolicy i) _const_;
OOMPolicy oom_policy_from_string(const char *s) _pure_;