From 9fb1cdb4808a65d1c96a99c1b0e53f54ba1b0d3b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Oct 2018 18:59:03 +0200 Subject: [PATCH 1/5] service: explicit stop the watchdog when we shall not use it This is useful so that WATCHDOG_USEC=0 sent from a process does the right thing if turning off the watchdog logic. --- src/core/service.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/service.c b/src/core/service.c index 510d8d6a3a..93f0665bcf 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -195,19 +195,21 @@ static usec_t service_get_watchdog_usec(Service *s) { if (s->watchdog_override_enable) return s->watchdog_override_usec; - else - return s->watchdog_usec; + + return s->watchdog_original_usec; } static void service_start_watchdog(Service *s) { - int r; usec_t watchdog_usec; + int r; assert(s); watchdog_usec = service_get_watchdog_usec(s); - if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) + if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) { + service_stop_watchdog(s); return; + } if (s->watchdog_event_source) { r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec)); From ec35a7f6b0d92fc30561dbcf9e4a6ee54dd4fba0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Oct 2018 19:02:13 +0200 Subject: [PATCH 2/5] service: rework service_extend_timeout() Let's unify common code: let's extend the watchdog timeout and the regular timeout with the same helper function. --- src/core/service.c | 71 +++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/src/core/service.c b/src/core/service.c index 93f0665bcf..15ceee7baf 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -237,48 +237,53 @@ static void service_start_watchdog(Service *s) { * of living before we consider a service died. */ r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE); } - if (r < 0) log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m"); } -static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) { +static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) { + usec_t current; + int r; + assert(s); - if (s->timer_event_source) { - uint64_t current = 0, extended = 0; - int r; + /* Extends the specified event source timer to at least the specified time, unless it is already later + * anyway. */ - if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY)) - return; + if (!source) + return; - extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec); - - r = sd_event_source_get_time(s->timer_event_source, ¤t); - if (r < 0) - log_unit_error_errno(UNIT(s), r, "Failed to retrieve timeout timer: %m"); - else if (extended > current) { - r = sd_event_source_set_time(s->timer_event_source, extended); - if (r < 0) - log_unit_warning_errno(UNIT(s), r, "Failed to set timeout timer: %m"); - } - - if (s->watchdog_event_source) { - /* extend watchdog if necessary. We've asked for an extended timeout so we - * shouldn't expect a watchdog timeout in the interval in between */ - r = sd_event_source_get_time(s->watchdog_event_source, ¤t); - if (r < 0) { - log_unit_error_errno(UNIT(s), r, "Failed to retrieve watchdog timer: %m"); - return; - } - - if (extended > current) { - r = sd_event_source_set_time(s->watchdog_event_source, extended); - if (r < 0) - log_unit_warning_errno(UNIT(s), r, "Failed to set watchdog timer: %m"); - } - } + r = sd_event_source_get_time(source, ¤t); + if (r < 0) { + const char *desc; + (void) sd_event_source_get_description(s->timer_event_source, &desc); + log_unit_warning_errno(UNIT(s), r, "Failed to retrieve timeout time for event source '%s', ignoring: %m", strna(desc)); + return; } + + if (current >= extended) /* Current timeout is already longer, ignore this. */ + return; + + r = sd_event_source_set_time(source, extended); + if (r < 0) { + const char *desc; + (void) sd_event_source_get_description(s->timer_event_source, &desc); + log_unit_warning_errno(UNIT(s), r, "Failed to set timeout time for even source '%s', ignoring %m", strna(desc)); + } +} + +static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) { + usec_t extended; + + assert(s); + + if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY)) + return; + + extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec); + + service_extend_event_source_timeout(s, s->timer_event_source, extended); + service_extend_event_source_timeout(s, s->watchdog_event_source, extended); } static void service_reset_watchdog(Service *s) { From 95d0d8ed0abdbdf5d66e663f4a2d6327cd074ab1 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Oct 2018 19:03:04 +0200 Subject: [PATCH 3/5] =?UTF-8?q?service:=20rename=20service=5Freset=5Fwatch?= =?UTF-8?q?dog=5Ftimeout()=20=E2=86=92=20service=5Foverride=5Fwatchdog=5Ft?= =?UTF-8?q?imeout()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is what the function really does, hence name it that way. --- src/core/service.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/service.c b/src/core/service.c index 15ceee7baf..cd33e218b6 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -293,7 +293,7 @@ static void service_reset_watchdog(Service *s) { service_start_watchdog(s); } -static void service_reset_watchdog_timeout(Service *s, usec_t watchdog_override_usec) { +static void service_override_watchdog_timeout(Service *s, usec_t watchdog_override_usec) { assert(s); s->watchdog_override_enable = true; @@ -3774,7 +3774,7 @@ static void service_notify_message( if (safe_atou64(e, &watchdog_override_usec) < 0) log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e); else - service_reset_watchdog_timeout(s, watchdog_override_usec); + service_override_watchdog_timeout(s, watchdog_override_usec); } /* Process FD store messages. Either FDSTOREREMOVE=1 for removal, or FDSTORE=1 for addition. In both cases, From 34b3f625f23f1185d673128254540e8c83be4117 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Oct 2018 19:03:41 +0200 Subject: [PATCH 4/5] service: continue to use the overriden timeout when forking off again Let's make sure we always use the right watchdog timeout: when a service has overwritten it, then stick to it, also for follow-up processes of the same service. --- src/core/service.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/service.c b/src/core/service.c index cd33e218b6..2e8ef7d1ab 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1539,7 +1539,7 @@ static int service_spawn( exec_params.fd_names = fd_names; exec_params.n_socket_fds = n_socket_fds; exec_params.n_storage_fds = n_storage_fds; - exec_params.watchdog_usec = s->watchdog_usec; + exec_params.watchdog_usec = service_get_watchdog_usec(s); exec_params.selinux_context_net = s->socket_fd_selinux_context_net; if (s->type == SERVICE_IDLE) exec_params.idle_pipe = UNIT(s)->manager->idle_pipe; From aa8c4bbf6a9d20576bd65f6b27b769c08be33e73 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Oct 2018 19:04:41 +0200 Subject: [PATCH 5/5] service: when starting a service make a copy of the watchdog timeout and use that When we start a service process we pass the selected watchdog timeout to it with the $WATCHDOG_USEC environment variable. If the unit file is reconfigured later, we need to make sure to continue to honour the original timeout, i.e. watch $WATCHDOG_USEC was set to, otherwise we'll expect the ping at a different time as the service process is sending it to us. Hence, whenever we start a unit, save the watchdog timeout, and stick to that for everything we do. Fixes: #9467 --- src/core/service.c | 12 +++++++++++- src/core/service.h | 5 +++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/core/service.c b/src/core/service.c index 2e8ef7d1ab..2a36cc03f3 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -107,6 +107,8 @@ static void service_init(Unit *u) { s->exec_context.keyring_mode = MANAGER_IS_SYSTEM(u->manager) ? EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT; + + s->watchdog_original_usec = USEC_INFINITY; } static void service_unwatch_control_pid(Service *s) { @@ -2352,8 +2354,9 @@ static int service_start(Unit *u) { s->notify_state = NOTIFY_UNKNOWN; + s->watchdog_original_usec = s->watchdog_usec; s->watchdog_override_enable = false; - s->watchdog_override_usec = 0; + s->watchdog_override_usec = USEC_INFINITY; exec_command_reset_status_list_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX); exec_status_reset(&s->main_exec_status); @@ -2588,6 +2591,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { if (s->watchdog_override_enable) (void) serialize_item_format(f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec); + if (s->watchdog_original_usec != USEC_INFINITY) + (void) serialize_item_format(f, "watchdog-original-usec", USEC_FMT, s->watchdog_original_usec); + return 0; } @@ -2896,6 +2902,10 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, else s->watchdog_override_enable = true; + } else if (streq(key, "watchdog-original-usec")) { + if (deserialize_usec(value, &s->watchdog_original_usec) < 0) + log_unit_debug(u, "Failed to parse watchdog_original_usec value: %s", value); + } else if (STR_IN_SET(key, "main-command", "control-command")) { r = service_deserialize_exec_command(u, key, value); if (r < 0) diff --git a/src/core/service.h b/src/core/service.h index 1206e3cdda..9c4340c70e 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -99,8 +99,9 @@ struct Service { usec_t runtime_max_usec; dual_timestamp watchdog_timestamp; - usec_t watchdog_usec; - usec_t watchdog_override_usec; + usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */ + usec_t watchdog_original_usec; /* the watchdog timeout that was in effect when the unit was started, i.e. the timeout the forked off processes currently see */ + usec_t watchdog_override_usec; /* the watchdog timeout requested by the service itself through sd_notify() */ bool watchdog_override_enable; sd_event_source *watchdog_event_source;