job: add JobRunningTimeoutSec for JOB_RUNNING state

Unit.JobTimeoutSec starts when a job is enqueued in a transaction. The
introduced distinct Unit.JobRunningTimeoutSec starts only when the job starts
running (e.g. it groups all Exec* commands of a service or spans waiting for a
device period.)

Unit.JobRunningTimeoutSec is intended to be used by default instead of
Unit.JobTimeoutSec for device units where such behavior causes less confusion
(consider a job for a _netdev mount device, with this change the timeout will
start ticking only after the network is ready).
This commit is contained in:
Michal Koutný 2017-02-17 17:47:20 +01:00
parent 9e49656037
commit a2df3ea4ae
8 changed files with 51 additions and 17 deletions

View File

@ -718,17 +718,20 @@
<varlistentry>
<term><varname>JobTimeoutSec=</varname></term>
<term><varname>JobRunningTimeoutSec=</varname></term>
<term><varname>JobTimeoutAction=</varname></term>
<term><varname>JobTimeoutRebootArgument=</varname></term>
<listitem><para>When a job for this unit is queued, a time-out may be configured. If this time limit is
reached, the job will be cancelled, the unit however will not change state or even enter the
<literal>failed</literal> mode. This value defaults to <literal>infinity</literal> (job timeouts disabled),
except for device units. NB: this timeout is independent from any unit-specific timeout (for example, the
timeout set with <varname>TimeoutStartSec=</varname> in service units) as the job timeout has no effect on the
unit itself, only on the job that might be pending for it. Or in other words: unit-specific timeouts are useful
to abort unit state changes, and revert them. The job timeout set with this option however is useful to abort
only the job waiting for the unit state to change.</para>
<listitem><para>When a job for this unit is queued, a time-out <varname>JobTimeoutSec=</varname> may be
configured. Similarly, <varname>JobRunningTimeoutSec=</varname> starts counting when the queued job is actually
started. If either time limit is reached, the job will be cancelled, the unit however will not change state or
even enter the <literal>failed</literal> mode. This value defaults to <literal>infinity</literal> (job timeouts
disabled), except for device units (<varname>JobRunningTimeoutSec=</varname> defaults to
<varname>DefaultTimeoutStartSec=</varname>). NB: this timeout is independent from any unit-specific timeout
(for example, the timeout set with <varname>TimeoutStartSec=</varname> in service units) as the job timeout has
no effect on the unit itself, only on the job that might be pending for it. Or in other words: unit-specific
timeouts are useful to abort unit state changes, and revert them. The job timeout set with this option however
is useful to abort only the job waiting for the unit state to change.</para>
<para><varname>JobTimeoutAction=</varname>
optionally configures an additional

View File

@ -748,6 +748,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobRunningTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_running_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),

View File

@ -576,6 +576,7 @@ int job_run_and_invalidate(Job *j) {
if (!job_is_runnable(j))
return -EAGAIN;
job_start_timer(j, true);
job_set_state(j, JOB_RUNNING);
job_add_to_dbus_queue(j);
@ -949,22 +950,45 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
return 0;
}
int job_start_timer(Job *j) {
int job_start_timer(Job *j, bool job_running) {
int r;
usec_t run_begin, timeout_time, old_timeout_time;
if (j->timer_event_source)
return 0;
if (job_running) {
if (j->unit->job_running_timeout == USEC_INFINITY)
return 0;
j->begin_usec = now(CLOCK_MONOTONIC);
run_begin = now(CLOCK_MONOTONIC);
timeout_time = usec_add(run_begin, j->unit->job_running_timeout);
if (j->unit->job_timeout == USEC_INFINITY)
return 0;
if (j->timer_event_source) {
/* Update only if JobRunningTimeoutSec= results in earlier timeout */
r = sd_event_source_get_time(j->timer_event_source, &old_timeout_time);
if (r < 0)
return r;
if (old_timeout_time <= timeout_time)
return 0;
return sd_event_source_set_time(j->timer_event_source, timeout_time);
}
} else {
if (j->timer_event_source)
return 0;
j->begin_usec = now(CLOCK_MONOTONIC);
if (j->unit->job_timeout == USEC_INFINITY)
return 0;
timeout_time = usec_add(j->begin_usec, j->unit->job_timeout);
}
r = sd_event_add_time(
j->manager->event,
&j->timer_event_source,
CLOCK_MONOTONIC,
usec_add(j->begin_usec, j->unit->job_timeout), 0,
timeout_time, 0,
job_dispatch_timer, j);
if (r < 0)
return r;

View File

@ -220,7 +220,7 @@ int job_type_merge_and_collapse(JobType *a, JobType b, Unit *u);
void job_add_to_run_queue(Job *j);
void job_add_to_dbus_queue(Job *j);
int job_start_timer(Job *j);
int job_start_timer(Job *j, bool job_running);
int job_run_and_invalidate(Job *j);
int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already);

View File

@ -194,6 +194,7 @@ Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0,
Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate)
Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0
Unit.JobTimeoutSec, config_parse_sec_fix_0, 0, offsetof(Unit, job_timeout)
Unit.JobRunningTimeoutSec, config_parse_sec, 0, offsetof(Unit, job_running_timeout)
Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action)
Unit.JobTimeoutRebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, job_timeout_reboot_arg)
Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_limit.interval)

View File

@ -632,7 +632,7 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) {
job_add_to_run_queue(j);
job_add_to_dbus_queue(j);
job_start_timer(j);
job_start_timer(j, false);
job_shutdown_magic(j);
}

View File

@ -99,6 +99,7 @@ Unit *unit_new(Manager *m, size_t size) {
u->on_failure_job_mode = JOB_REPLACE;
u->cgroup_inotify_wd = -1;
u->job_timeout = USEC_INFINITY;
u->job_running_timeout = USEC_INFINITY;
u->ref_uid = UID_INVALID;
u->ref_gid = GID_INVALID;
u->cpu_usage_last = NSEC_INFINITY;
@ -1336,6 +1337,9 @@ int unit_load(Unit *u) {
goto fail;
}
if (u->job_running_timeout != USEC_INFINITY && u->job_running_timeout > u->job_timeout)
log_unit_warning(u, "JobRunningTimeoutSec= is greater than JobTimeoutSec=, it has no effect.");
unit_update_cgroup_members_masks(u);
}

View File

@ -114,6 +114,7 @@ struct Unit {
/* Job timeout and action to take */
usec_t job_timeout;
usec_t job_running_timeout;
EmergencyAction job_timeout_action;
char *job_timeout_reboot_arg;