From e602f15282bcb413857e5ed06b0b02985cea1cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 6 Jun 2019 23:27:20 +0200 Subject: [PATCH 1/4] core: Extract job ordering logic The job ordering logic is spread at multiple places of the code, making it hard to maintain and also a bit to understand. The actual execution order of two jobs always depends on their types and the ordering contraint between their units. Extract this logic to a new function job_compare. The second change is simplification of the order evaluation, JOB_STOP takes always precedence (as documented), unless two units are both stopping, then the ordering constraint is taken into account. --- src/core/job.c | 163 +++++++++++++++++++++++-------------------------- src/core/job.h | 2 + 2 files changed, 77 insertions(+), 88 deletions(-) diff --git a/src/core/job.c b/src/core/job.c index 81f5f9cb72..780f92c4d2 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -477,35 +477,13 @@ static bool job_is_runnable(Job *j) { if (j->type == JOB_NOP) return true; - if (IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD)) { - /* Immediate result is that the job is or might be - * started. In this case let's wait for the - * dependencies, regardless whether they are - * starting or stopping something. */ - - HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) - if (other->job) - return false; - } - - /* Also, if something else is being stopped and we should - * change state after it, then let's wait. */ - - HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) - if (other->job && - IN_SET(other->job->type, JOB_STOP, JOB_RESTART)) + HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) + if (other->job && job_compare(j, other->job, UNIT_AFTER) > 0) return false; - /* This means that for a service a and a service b where b - * shall be started after a: - * - * start a + start b → 1st step start a, 2nd step start b - * start a + stop b → 1st step stop b, 2nd step start a - * stop a + start b → 1st step stop a, 2nd step start b - * stop a + stop b → 1st step stop b, 2nd step stop a - * - * This has the side effect that restarts are properly - * synchronized too. */ + HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) + if (other->job && job_compare(j, other->job, UNIT_BEFORE) > 0) + return false; return true; } @@ -1455,46 +1433,14 @@ bool job_may_gc(Job *j) { if (j->type == JOB_NOP) return false; - /* If a job is ordered after ours, and is to be started, then it needs to wait for us, regardless if we stop or - * start, hence let's not GC in that case. */ - HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) { - if (!other->job) - continue; - - if (other->job->ignore_order) - continue; - - if (IN_SET(other->job->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD)) + /* The logic is inverse to job_is_runnable, we cannot GC as long as we block any job. */ + HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) + if (other->job && job_compare(j, other->job, UNIT_BEFORE) < 0) return false; - } - - /* If we are going down, but something else is ordered After= us, then it needs to wait for us */ - if (IN_SET(j->type, JOB_STOP, JOB_RESTART)) - HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) { - if (!other->job) - continue; - - if (other->job->ignore_order) - continue; + HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) + if (other->job && job_compare(j, other->job, UNIT_AFTER) < 0) return false; - } - - /* The logic above is kinda the inverse of the job_is_runnable() logic. Specifically, if the job "we" is - * ordered before the job "other": - * - * we start + other start → stay - * we start + other stop → gc - * we stop + other start → stay - * we stop + other stop → gc - * - * "we" are ordered after "other": - * - * we start + other start → gc - * we start + other stop → gc - * we stop + other start → stay - * we stop + other stop → stay - */ return true; } @@ -1512,7 +1458,7 @@ void job_add_to_gc_queue(Job *j) { j->in_gc_queue = true; } -static int job_compare(Job * const *a, Job * const *b) { +static int job_compare_id(Job * const *a, Job * const *b) { return CMP((*a)->id, (*b)->id); } @@ -1521,7 +1467,7 @@ static size_t sort_job_list(Job **list, size_t n) { size_t a, b; /* Order by numeric IDs */ - typesafe_qsort(list, n, job_compare); + typesafe_qsort(list, n, job_compare_id); /* Filter out duplicates */ for (a = 0, b = 0; a < n; a++) { @@ -1552,23 +1498,21 @@ int job_get_before(Job *j, Job*** ret) { return 0; } - if (IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD)) { + HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) { + if (!other->job) + continue; + if (job_compare(j, other->job, UNIT_AFTER) <= 0) + continue; - HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) { - if (!other->job) - continue; - - if (!GREEDY_REALLOC(list, n_allocated, n+1)) - return -ENOMEM; - list[n++] = other->job; - } + if (!GREEDY_REALLOC(list, n_allocated, n+1)) + return -ENOMEM; + list[n++] = other->job; } HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) { if (!other->job) continue; - - if (!IN_SET(other->job->type, JOB_STOP, JOB_RESTART)) + if (job_compare(j, other->job, UNIT_BEFORE) <= 0) continue; if (!GREEDY_REALLOC(list, n_allocated, n+1)) @@ -1602,7 +1546,7 @@ int job_get_after(Job *j, Job*** ret) { if (other->job->ignore_order) continue; - if (!IN_SET(other->job->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD)) + if (job_compare(j, other->job, UNIT_BEFORE) >= 0) continue; if (!GREEDY_REALLOC(list, n_allocated, n+1)) @@ -1610,19 +1554,20 @@ int job_get_after(Job *j, Job*** ret) { list[n++] = other->job; } - if (IN_SET(j->type, JOB_STOP, JOB_RESTART)) { - HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) { - if (!other->job) - continue; + HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) { + if (!other->job) + continue; - if (other->job->ignore_order) - continue; + if (other->job->ignore_order) + continue; - if (!GREEDY_REALLOC(list, n_allocated, n+1)) - return -ENOMEM; - list[n++] = other->job; - } + if (job_compare(j, other->job, UNIT_AFTER) >= 0) + continue; + + if (!GREEDY_REALLOC(list, n_allocated, n+1)) + return -ENOMEM; + list[n++] = other->job; } n = sort_job_list(list, n); @@ -1692,3 +1637,45 @@ const char* job_type_to_access_method(JobType t) { else return "reload"; } + +/* + * assume_dep assumed dependency between units (a is before/after b) + * + * Returns + * 0 jobs are independent, + * >0 a should run after b, + * <0 a should run before b, + * + * The logic means that for a service a and a service b where b.After=a: + * + * start a + start b → 1st step start a, 2nd step start b + * start a + stop b → 1st step stop b, 2nd step start a + * stop a + start b → 1st step stop a, 2nd step start b + * stop a + stop b → 1st step stop b, 2nd step stop a + * + * This has the side effect that restarts are properly + * synchronized too. + */ +int job_compare(Job *a, Job *b, UnitDependency assume_dep) { + assert(a->type < _JOB_TYPE_MAX_IN_TRANSACTION); + assert(b->type < _JOB_TYPE_MAX_IN_TRANSACTION); + assert(IN_SET(assume_dep, UNIT_AFTER, UNIT_BEFORE)); + + /* Trivial cases first */ + if (a->type == JOB_NOP || b->type == JOB_NOP) + return 0; + + if (a->ignore_order || b->ignore_order) + return 0; + + if (assume_dep == UNIT_AFTER) + return -job_compare(b, a, UNIT_BEFORE); + + /* Let's make it simple, JOB_STOP goes always first (in case both ua and ub stop, + * then ub's stop goes first anyway). + * JOB_RESTART is JOB_STOP in disguise (before it is patched to JOB_START). */ + if (IN_SET(b->type, JOB_STOP, JOB_RESTART)) + return 1; + else + return -1; +} diff --git a/src/core/job.h b/src/core/job.h index 0f15cbf821..9c79c873d1 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -237,3 +237,5 @@ const char* job_result_to_string(JobResult t) _const_; JobResult job_result_from_string(const char *s) _pure_; const char* job_type_to_access_method(JobType t); + +int job_compare(Job *a, Job *b, UnitDependency assume_dep); From dfd79eca55e3d4bd61813ca4cf8887544d952c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 6 Jun 2019 23:27:20 +0200 Subject: [PATCH 2/4] core: Check transaction against execution cycles When we are validating a transaction, we take into account declared ordering between job units. However, since JOB_STOP goes always first regardless of the ordering constraint between respective units, we may detect some false cycles in the transaction which would not prevent the execution though. Use the same logic in transaction checking as we use for job execution. --- src/core/transaction.c | 45 +++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/src/core/transaction.c b/src/core/transaction.c index 3b6b240d36..d1bf2e64c9 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -353,6 +353,11 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi Unit *u; void *v; int r; + static const UnitDependency directions[] = { + UNIT_BEFORE, + UNIT_AFTER, + }; + size_t d; assert(tr); assert(j); @@ -441,25 +446,33 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi j->marker = from ? from : j; j->generation = generation; - /* We assume that the dependencies are bidirectional, and - * hence can ignore UNIT_AFTER */ - HASHMAP_FOREACH_KEY(v, u, j->unit->dependencies[UNIT_BEFORE], i) { - Job *o; + /* Actual ordering of jobs depends on the unit ordering dependency and job types. We need to traverse + * the graph over 'before' edges in the actual job execution order. We traverse over both unit + * ordering dependencies and we test with job_compare() whether it is the 'before' edge in the job + * execution ordering. */ + for (d = 0; d < ELEMENTSOF(directions); d++) { + HASHMAP_FOREACH_KEY(v, u, j->unit->dependencies[directions[d]], i) { + Job *o; - /* Is there a job for this unit? */ - o = hashmap_get(tr->jobs, u); - if (!o) { - /* Ok, there is no job for this in the - * transaction, but maybe there is already one - * running? */ - o = u->job; - if (!o) + /* Is there a job for this unit? */ + o = hashmap_get(tr->jobs, u); + if (!o) { + /* Ok, there is no job for this in the + * transaction, but maybe there is already one + * running? */ + o = u->job; + if (!o) + continue; + } + + /* Cut traversing if the job j is not really *before* o. */ + if (job_compare(j, o, directions[d]) >= 0) continue; - } - r = transaction_verify_order_one(tr, o, j, generation, e); - if (r < 0) - return r; + r = transaction_verify_order_one(tr, o, j, generation, e); + if (r < 0) + return r; + } } /* Ok, let's backtrack, and remember that this entry is not on From 804cdabc31b38b840eec2b64dc8bdd9a8e660a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 6 Jun 2019 23:27:20 +0200 Subject: [PATCH 3/4] tests: Check job ordering on execution cycles The test-engine Test2 tests the cycle detection when units a, b and d all start at once ,-------------------after-----------------, v | a/start ---after---> d/start ---after---> b/start Extend the test with Test11 that adds i.service which causes a and d stop (by unordered Conflicts=) while starting b. Because stops precede starts, we effectively eliminate the job cycle and all transaction jobs should be applicable. ,-------------------after-----------------, v | a/stop <---after--- d/stop <---after--- b/start . . ^ . . | '. . . . . . . . . i/start ---after------' --- src/test/test-engine.c | 18 +++++++++++++++++- test/i.service | 8 ++++++++ test/meson.build | 1 + 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 test/i.service diff --git a/src/test/test-engine.c b/src/test/test-engine.c index 701594fe53..3624a274e2 100644 --- a/src/test/test-engine.c +++ b/src/test/test-engine.c @@ -9,12 +9,14 @@ #include "rm-rf.h" #include "test-helper.h" #include "tests.h" +#include "service.h" int main(int argc, char *argv[]) { _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL; _cleanup_(sd_bus_error_free) sd_bus_error err = SD_BUS_ERROR_NULL; _cleanup_(manager_freep) Manager *m = NULL; - Unit *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *g = NULL, *h = NULL, *unit_with_multiple_dashes = NULL; + Unit *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *g = NULL, + *h = NULL, *i = NULL, *unit_with_multiple_dashes = NULL; Job *j; int r; @@ -94,6 +96,20 @@ int main(int argc, char *argv[]) { assert_se(manager_add_job(m, JOB_START, h, JOB_FAIL, NULL, NULL, &j) == 0); manager_dump_jobs(m, stdout, "\t"); + printf("Load5:\n"); + manager_clear_jobs(m); + assert_se(manager_load_startable_unit_or_warn(m, "i.service", NULL, &i) >= 0); + SERVICE(a)->state = SERVICE_RUNNING; + SERVICE(d)->state = SERVICE_RUNNING; + manager_dump_units(m, stdout, "\t"); + + printf("Test11: (Start/stop job ordering, execution cycle)\n"); + assert_se(manager_add_job(m, JOB_START, i, JOB_FAIL, NULL, NULL, &j) == 0); + assert_se(a->job && a->job->type == JOB_STOP); + assert_se(d->job && d->job->type == JOB_STOP); + assert_se(b->job && b->job->type == JOB_START); + manager_dump_jobs(m, stdout, "\t"); + assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b)); assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a)); assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c)); diff --git a/test/i.service b/test/i.service new file mode 100644 index 0000000000..938ea77bdf --- /dev/null +++ b/test/i.service @@ -0,0 +1,8 @@ +[Unit] +Description=I +Conflicts=a.service d.service +Wants=b.service +After=b.service + +[Service] +ExecStart=/bin/true diff --git a/test/meson.build b/test/meson.build index 17d0f3cddd..a243538081 100644 --- a/test/meson.build +++ b/test/meson.build @@ -26,6 +26,7 @@ test_data_files = ''' hello-after-sleep.target hello.service hwdb/10-bad.hwdb + i.service journal-data/journal-1.txt journal-data/journal-2.txt nomem.slice From 594057fd996ad8401b815ed974f1236c4edc3e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 6 Jun 2019 23:27:20 +0200 Subject: [PATCH 4/4] tests: Check trivial loop between two jobs job_compare return value is undefined in case the jobs have a loop between them, so better make a test to make sure transaction cycle detection catches it. --- src/test/test-engine.c | 12 +++++++++++- test/a-conj.service | 8 ++++++++ test/meson.build | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 test/a-conj.service diff --git a/src/test/test-engine.c b/src/test/test-engine.c index 3624a274e2..9809d408f6 100644 --- a/src/test/test-engine.c +++ b/src/test/test-engine.c @@ -16,7 +16,7 @@ int main(int argc, char *argv[]) { _cleanup_(sd_bus_error_free) sd_bus_error err = SD_BUS_ERROR_NULL; _cleanup_(manager_freep) Manager *m = NULL; Unit *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *g = NULL, - *h = NULL, *i = NULL, *unit_with_multiple_dashes = NULL; + *h = NULL, *i = NULL, *a_conj = NULL, *unit_with_multiple_dashes = NULL; Job *j; int r; @@ -110,6 +110,16 @@ int main(int argc, char *argv[]) { assert_se(b->job && b->job->type == JOB_START); manager_dump_jobs(m, stdout, "\t"); + printf("Load6:\n"); + manager_clear_jobs(m); + assert_se(manager_load_startable_unit_or_warn(m, "a-conj.service", NULL, &a_conj) >= 0); + SERVICE(a)->state = SERVICE_DEAD; + manager_dump_units(m, stdout, "\t"); + + printf("Test12: (Trivial cycle, Unfixable)\n"); + assert_se(manager_add_job(m, JOB_START, a_conj, JOB_REPLACE, NULL, NULL, &j) == -EDEADLK); + manager_dump_jobs(m, stdout, "\t"); + assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b)); assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a)); assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c)); diff --git a/test/a-conj.service b/test/a-conj.service new file mode 100644 index 0000000000..db37ae71d4 --- /dev/null +++ b/test/a-conj.service @@ -0,0 +1,8 @@ +[Unit] +Description=A conjugate +Requires=a.service +After=a.service +Before=a.service + +[Service] +ExecStart=/bin/true diff --git a/test/meson.build b/test/meson.build index a243538081..8c71e72667 100644 --- a/test/meson.build +++ b/test/meson.build @@ -2,6 +2,7 @@ test_data_files = ''' a.service + a-conj.service b.service basic.target c.service