From 1d9cc8768f173b25757c01aa0d4c7be7cd7116bc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 6 Feb 2018 11:57:35 +0100 Subject: [PATCH 01/21] cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and service units only Currently we allowed delegation for alluntis with cgroup backing except for slices. Let's make this a bit more strict for now, and only allow this in service and scope units. Let's also add a generic accessor unit_cgroup_delegate() for checking whether a unit has delegation turned on that checks the new bool first. Also, when doing transient units, let's explcitly refuse turning on delegation for unit types that don#t support it. This is mostly cosmetical as we wouldn't act on the delegation request anyway, but certainly helpful for debugging. --- src/core/bpf-firewall.c | 4 ++-- src/core/cgroup.c | 27 ++++++++++++++++++--------- src/core/cgroup.h | 2 ++ src/core/dbus-cgroup.c | 6 ++++++ src/core/scope.c | 1 + src/core/service.c | 4 +++- src/core/unit.c | 23 ++++++++--------------- src/core/unit.h | 3 +++ 8 files changed, 43 insertions(+), 27 deletions(-) diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index f3f40fb0e8..e11550f11c 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -569,7 +569,7 @@ int bpf_firewall_install(Unit *u) { if (r < 0) return log_error_errno(r, "Kernel upload of egress BPF program failed: %m"); - r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0); + r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0); if (r < 0) return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path); } else { @@ -584,7 +584,7 @@ int bpf_firewall_install(Unit *u) { if (r < 0) return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m"); - r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0); + r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0); if (r < 0) return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path); } else { diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 97b3756567..d05e338d11 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1124,14 +1124,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) { * * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */ - if (u->type == UNIT_SLICE) - return 0; - - c = unit_get_cgroup_context(u); - if (!c) - return 0; - - if (!c->delegate) + if (!unit_cgroup_delegate(u)) return 0; if (cg_all_unified() <= 0) { @@ -1142,6 +1135,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) { return 0; } + assert_se(c = unit_get_cgroup_context(u)); return c->delegate_controllers; } @@ -1496,7 +1490,7 @@ static int unit_create_cgroup( u->cgroup_enabled_mask = enable_mask; u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF; - if (u->type != UNIT_SLICE && !c->delegate) { + if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) { /* Then, possibly move things over, but not if * subgroups may contain processes, which is the case @@ -2563,6 +2557,21 @@ void unit_invalidate_cgroup_bpf(Unit *u) { } } +bool unit_cgroup_delegate(Unit *u) { + CGroupContext *c; + + assert(u); + + if (!UNIT_VTABLE(u)->can_delegate) + return false; + + c = unit_get_cgroup_context(u); + if (!c) + return false; + + return c->delegate; +} + void manager_invalidate_startup_units(Manager *m) { Iterator i; Unit *u; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 1f50441412..5113313405 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -219,3 +219,5 @@ void manager_invalidate_startup_units(Manager *m); const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_; CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_; + +bool unit_cgroup_delegate(Unit *u); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index f8d90d4b3a..30456fa9f5 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -351,6 +351,9 @@ static int bus_cgroup_set_transient_property( if (streq(name, "Delegate")) { int b; + if (!UNIT_VTABLE(u)->can_delegate) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type"); + r = sd_bus_message_read(message, "b", &b); if (r < 0) return r; @@ -367,6 +370,9 @@ static int bus_cgroup_set_transient_property( } else if (streq(name, "DelegateControllers")) { CGroupMask mask = 0; + if (!UNIT_VTABLE(u)->can_delegate) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type"); + r = sd_bus_message_enter_container(message, 'a', "s"); if (r < 0) return r; diff --git a/src/core/scope.c b/src/core/scope.c index 2831e057a1..47c47bc708 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -599,6 +599,7 @@ const UnitVTable scope_vtable = { .private_section = "Scope", .can_transient = true, + .can_delegate = true, .init = scope_init, .load = scope_load, diff --git a/src/core/service.c b/src/core/service.c index 37904874b5..3c1dbdfe0f 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -3909,6 +3909,9 @@ const UnitVTable service_vtable = { "Install\0", .private_section = "Service", + .can_transient = true, + .can_delegate = true, + .init = service_init, .done = service_done, .load = service_load, @@ -3954,7 +3957,6 @@ const UnitVTable service_vtable = { .get_timeout = service_get_timeout, .needs_console = service_needs_console, - .can_transient = true, .status_message_formats = { .starting_stopping = { diff --git a/src/core/unit.c b/src/core/unit.c index 9a57bcfb4b..68f29137b8 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4541,22 +4541,15 @@ int unit_kill_context( } else if (r > 0) { - /* FIXME: For now, on the legacy hierarchy, we - * will not wait for the cgroup members to die - * if we are running in a container or if this - * is a delegation unit, simply because cgroup - * notification is unreliable in these - * cases. It doesn't work at all in - * containers, and outside of containers it - * can be confused easily by left-over - * directories in the cgroup — which however - * should not exist in non-delegated units. On - * the unified hierarchy that's different, - * there we get proper events. Hence rely on - * them. */ + /* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if + * we are running in a container or if this is a delegation unit, simply because cgroup + * notification is unreliable in these cases. It doesn't work at all in containers, and outside + * of containers it can be confused easily by left-over directories in the cgroup — which + * however should not exist in non-delegated units. On the unified hierarchy that's different, + * there we get proper events. Hence rely on them. */ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 || - (detect_container() == 0 && !UNIT_CGROUP_BOOL(u, delegate))) + (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; if (send_sighup) { @@ -5007,7 +5000,7 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) { assert(p); p->cgroup_path = u->cgroup_path; - SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, UNIT_CGROUP_BOOL(u, delegate)); + SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u)); } int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) { diff --git a/src/core/unit.h b/src/core/unit.h index 3210583050..6d49c7d5f2 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -568,6 +568,9 @@ struct UnitVTable { /* True if transient units of this type are OK */ bool can_transient:1; + /* True if cgroup delegation is permissible */ + bool can_delegate:1; + /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */ bool gc_jobs:1; }; From 004c7f169e8b9b1a6201bcd1dc611ff6efe501f9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 6 Feb 2018 13:09:52 +0100 Subject: [PATCH 02/21] core: fold manager_set_exec_params() into unit_set_exec_params() Let's simplify things a bit: we so far called both functions every single time, let's just merge one into the other, so that we have fewer functions to call. --- src/core/manager.c | 12 ------------ src/core/manager.h | 2 -- src/core/mount.c | 1 - src/core/service.c | 1 - src/core/socket.c | 1 - src/core/swap.c | 1 - src/core/unit.c | 8 ++++++++ 7 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index 5021e00b87..b54c021c75 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3703,18 +3703,6 @@ Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path) { return hashmap_get(m->units_requiring_mounts_for, streq(p, "/") ? "" : p); } -void manager_set_exec_params(Manager *m, ExecParameters *p) { - assert(m); - assert(p); - - p->environment = m->environment; - p->confirm_spawn = manager_get_confirm_spawn(m); - p->cgroup_supported = m->cgroup_supported; - p->prefix = m->prefix; - - SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(m)); -} - int manager_update_failed_units(Manager *m, Unit *u, bool failed) { unsigned size; int r; diff --git a/src/core/manager.h b/src/core/manager.h index c785861bfb..fc70593def 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -435,8 +435,6 @@ void manager_flip_auto_status(Manager *m, bool enable); Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path); -void manager_set_exec_params(Manager *m, ExecParameters *p); - ManagerState manager_state(Manager *m); int manager_update_failed_units(Manager *m, Unit *u, bool failed); diff --git a/src/core/mount.c b/src/core/mount.c index 914458f8e6..cb8dda8557 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -781,7 +781,6 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; - manager_set_exec_params(UNIT(m)->manager, &exec_params); unit_set_exec_params(UNIT(m), &exec_params); r = exec_spawn(UNIT(m), diff --git a/src/core/service.c b/src/core/service.c index 3c1dbdfe0f..ab4626f51b 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1449,7 +1449,6 @@ static int service_spawn( } } - manager_set_exec_params(UNIT(s)->manager, &exec_params); unit_set_exec_params(UNIT(s), &exec_params); final_env = strv_env_merge(2, exec_params.environment, our_env, NULL); diff --git a/src/core/socket.c b/src/core/socket.c index 703f9f760f..38ad8820be 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1910,7 +1910,6 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; - manager_set_exec_params(UNIT(s)->manager, &exec_params); unit_set_exec_params(UNIT(s), &exec_params); exec_params.argv = c->argv; diff --git a/src/core/swap.c b/src/core/swap.c index fffd8d4627..6e5e88b473 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -632,7 +632,6 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { if (r < 0) goto fail; - manager_set_exec_params(UNIT(s)->manager, &exec_params); unit_set_exec_params(UNIT(s), &exec_params); r = exec_spawn(UNIT(s), diff --git a/src/core/unit.c b/src/core/unit.c index 68f29137b8..0ad7dc108b 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4999,6 +4999,14 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) { assert(u); assert(p); + /* Copy parameters from manager */ + p->environment = u->manager->environment; + p->confirm_spawn = manager_get_confirm_spawn(u->manager); + p->cgroup_supported = u->manager->cgroup_supported; + p->prefix = u->manager->prefix; + SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager)); + + /* Copy paramaters from unit */ p->cgroup_path = u->cgroup_path; SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u)); } From 36b5119a0c469df85be0b1807ee866aba5e33287 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 6 Feb 2018 15:59:55 +0100 Subject: [PATCH 03/21] process-util: be more careful in is_kernel_thread() This reworks is_kernel_thread() a bit. Instead of checking whether /proc/$pid/cmdline is entirely empty we now parse the 'flags' field from /proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes whether something is a kernel thread. Why all this? With current kernels userspace processes can set their command line to empty too (through PR_SET_MM_ARG_START and friends), and could potentially confuse us. Hence, let's use a more reliable way to detect kernels like this. --- src/basic/missing.h | 4 +++ src/basic/process-util.c | 64 +++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/basic/missing.h b/src/basic/missing.h index 9d4d08e7a9..f3f2743c03 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -1355,4 +1355,8 @@ struct fib_rule_uid_range { #define NS_GET_NSTYPE _IO(0xb7, 0x3) #endif +#ifndef PF_KTHREAD +#define PF_KTHREAD 0x00200000 +#endif + #include "missing_syscall.h" diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 7f8644ea9f..855ac7534a 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -398,37 +398,61 @@ use_saved_argv: } int is_kernel_thread(pid_t pid) { + _cleanup_free_ char *line = NULL; + unsigned long long flags; + size_t l, i; const char *p; - size_t count; - char c; - bool eof; - FILE *f; + char *q; + int r; if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */ return 0; + if (!pid_is_valid(pid)) + return -EINVAL; - assert(pid > 1); + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; - p = procfs_file_alloca(pid, "cmdline"); - f = fopen(p, "re"); - if (!f) { - if (errno == ENOENT) - return -ESRCH; - return -errno; + /* Skip past the comm field */ + q = strrchr(line, ')'); + if (!q) + return -EINVAL; + q++; + + /* Skip 6 fields to reach the flags field */ + for (i = 0; i < 6; i++) { + l = strspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + + l = strcspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; } - (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + /* Skip preceeding whitespace */ + l = strspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; - count = fread(&c, 1, 1, f); - eof = feof(f); - fclose(f); + /* Truncate the rest */ + l = strcspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q[l] = 0; - /* Kernel threads have an empty cmdline */ + r = safe_atollu(q, &flags); + if (r < 0) + return r; - if (count <= 0) - return eof ? 1 : -errno; - - return 0; + return !!(flags & PF_KTHREAD); } int get_process_capeff(pid_t pid, char **capeff) { From d66a98482b64d89c71baa10eea4b4ed83ec83ca0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 10:39:49 +0100 Subject: [PATCH 04/21] mkosi: update to fedora 27, it's released since a while --- .mkosi/mkosi.fedora | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mkosi/mkosi.fedora b/.mkosi/mkosi.fedora index 4d7168c00b..c7505d19f6 100644 --- a/.mkosi/mkosi.fedora +++ b/.mkosi/mkosi.fedora @@ -22,7 +22,7 @@ [Distribution] Distribution=fedora -Release=26 +Release=27 [Output] Format=raw_btrfs From 418cdd69d15191bac032d27fe3ec2708112cfcf6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 10:39:56 +0100 Subject: [PATCH 05/21] bpf-firewall: fix warning text I figure saying "systemd" here was a typo, and it should have been "system". (Yes, it becomes very hard after a while typing "system" correctly if you type "systemd" so often.) That said, "systemd" in some ways is actually more correct, since BPF might be available for the system instance but not in the user instance. Either way, talking of "this systemd" is weird, let's reword this to be "this manager", to emphasize that it's the local instance of systemd where BPF is not available, but that it might be available otherwise. --- src/core/bpf-firewall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index e11550f11c..c5cda83006 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -494,7 +494,7 @@ int bpf_firewall_compile(Unit *u) { if (r < 0) return r; if (r == 0) { - log_debug("BPF firewalling not supported on this systemd, proceeding without."); + log_debug("BPF firewalling not supported on this manager, proceeding without."); return -EOPNOTSUPP; } @@ -556,7 +556,7 @@ int bpf_firewall_install(Unit *u) { if (r < 0) return r; if (r == 0) { - log_debug("BPF firewalling not supported on this systemd, proceeding without."); + log_debug("BPF firewalling not supported on this manager, proceeding without."); return -EOPNOTSUPP; } From 4502c403995df57c15b5c3d4b55ea58f24e2c0cc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 15:00:32 +0100 Subject: [PATCH 06/21] dbus: split up bus_done() into seperate functions No functional changes, but let's make this a bit more finegrained. (The individual functions are exported, which is used in a later commit) --- src/core/dbus.c | 39 ++++++++++++++++++++++++++++----------- src/core/dbus.h | 4 ++++ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/core/dbus.c b/src/core/dbus.c index 1c3fca353a..c46c299207 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -139,9 +139,10 @@ static int signal_disconnected(sd_bus_message *message, void *userdata, sd_bus_e assert_se(bus = sd_bus_message_get_bus(message)); if (bus == m->api_bus) - destroy_bus(m, &m->api_bus); + bus_done_api(m); if (bus == m->system_bus) - destroy_bus(m, &m->system_bus); + bus_done_system(m); + if (set_remove(m->private_buses, bus)) { log_debug("Got disconnect on private connection."); destroy_bus(m, &bus); @@ -1077,28 +1078,44 @@ static void destroy_bus(Manager *m, sd_bus **bus) { *bus = sd_bus_unref(*bus); } -void bus_done(Manager *m) { - sd_bus *b; - +void bus_done_api(Manager *m) { assert(m); if (m->api_bus) destroy_bus(m, &m->api_bus); +} + +void bus_done_system(Manager *m) { + assert(m); + if (m->system_bus) destroy_bus(m, &m->system_bus); +} + +void bus_done_private(Manager *m) { + sd_bus *b; + + assert(m); + while ((b = set_steal_first(m->private_buses))) destroy_bus(m, &b); m->private_buses = set_free(m->private_buses); - m->subscribed = sd_bus_track_unref(m->subscribed); - m->deserialized_subscribed = strv_free(m->deserialized_subscribed); - - if (m->private_listen_event_source) - m->private_listen_event_source = sd_event_source_unref(m->private_listen_event_source); - + m->private_listen_event_source = sd_event_source_unref(m->private_listen_event_source); m->private_listen_fd = safe_close(m->private_listen_fd); +} +void bus_done(Manager *m) { + assert(m); + + bus_done_api(m); + bus_done_system(m); + bus_done_private(m); + + assert(!m->subscribed); + + m->deserialized_subscribed = strv_free(m->deserialized_subscribed); bus_verify_polkit_async_registry_free(m->polkit_registry); } diff --git a/src/core/dbus.h b/src/core/dbus.h index fe50fdd5f7..17dfbc9f97 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -25,6 +25,10 @@ int bus_send_queued_message(Manager *m); int bus_init(Manager *m, bool try_bus_connect); + +void bus_done_private(Manager *m); +void bus_done_api(Manager *m); +void bus_done_system(Manager *m); void bus_done(Manager *m); int bus_fdset_add_all(Manager *m, FDSet *fds); From 8559b3b75cbd04ec132f55bdb61eae7faf27d6fc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 14:52:22 +0100 Subject: [PATCH 07/21] core: rework how we connect to the bus This removes the current bus_init() call, as it had multiple problems: it munged handling of the three bus connections we care about (private, "api" and system) into one, even though the conditions when which was ready are very different. It also added redundant logging, as the individual calls it called all logged on their own anyway. The three calls bus_init_api(), bus_init_private() and bus_init_system() are now made public. A new call manager_dbus_is_running() is added that works much like manager_journal_is_running() and is a lot more careful when checking whether dbus is around. Optionally it checks the unit's deserialized_state rather than state, in order to accomodate for cases where we cant to connect to the bus before deserializing the "subscribed" list, before coldplugging the units. manager_recheck_dbus() is added, that works a lot like manager_recheck_journal() and is invoked in unit_notify(), i.e. when units change state. All in all this should make handling a bit more alike to journal handling, and it also fixes one major bug: when running in user mode we'll now connect to the system bus early on, without conditionalizing this in anyway. --- src/core/dbus.c | 26 ++--------- src/core/dbus.h | 4 +- src/core/manager.c | 111 +++++++++++++++++++++++++++++++-------------- src/core/manager.h | 1 + src/core/unit.c | 28 ++++-------- 5 files changed, 93 insertions(+), 77 deletions(-) diff --git a/src/core/dbus.c b/src/core/dbus.c index c46c299207..65079135df 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -846,7 +846,7 @@ static int bus_setup_api(Manager *m, sd_bus *bus) { return 0; } -static int bus_init_api(Manager *m) { +int bus_init_api(Manager *m) { _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; int r; @@ -907,7 +907,7 @@ static int bus_setup_system(Manager *m, sd_bus *bus) { return 0; } -static int bus_init_system(Manager *m) { +int bus_init_system(Manager *m) { _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; int r; @@ -942,7 +942,7 @@ static int bus_init_system(Manager *m) { return 0; } -static int bus_init_private(Manager *m) { +int bus_init_private(Manager *m) { _cleanup_close_ int fd = -1; union sockaddr_union sa = { .un.sun_family = AF_UNIX @@ -1014,26 +1014,6 @@ static int bus_init_private(Manager *m) { return 0; } -int bus_init(Manager *m, bool try_bus_connect) { - int r; - - if (try_bus_connect) { - r = bus_init_system(m); - if (r < 0) - return log_error_errno(r, "Failed to initialize D-Bus connection: %m"); - - r = bus_init_api(m); - if (r < 0) - return log_error_errno(r, "Error occured during D-Bus APIs initialization: %m"); - } - - r = bus_init_private(m); - if (r < 0) - return log_error_errno(r, "Failed to create private D-Bus server: %m"); - - return 0; -} - static void destroy_bus(Manager *m, sd_bus **bus) { Iterator i; Unit *u; diff --git a/src/core/dbus.h b/src/core/dbus.h index 17dfbc9f97..fb4988dace 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -24,7 +24,9 @@ int bus_send_queued_message(Manager *m); -int bus_init(Manager *m, bool try_bus_connect); +int bus_init_private(Manager *m); +int bus_init_api(Manager *m); +int bus_init_system(Manager *m); void bus_done_private(Manager *m); void bus_done_api(Manager *m); diff --git a/src/core/manager.c b/src/core/manager.c index b54c021c75..8ae70b648c 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -985,26 +985,6 @@ static int manager_setup_user_lookup_fd(Manager *m) { return 0; } -static int manager_connect_bus(Manager *m, bool reexecuting) { - bool try_bus_connect; - Unit *u = NULL; - - assert(m); - - if (m->test_run_flags) - return 0; - - u = manager_get_unit(m, SPECIAL_DBUS_SERVICE); - - try_bus_connect = - (u && SERVICE(u)->deserialized_state == SERVICE_RUNNING) && - (reexecuting || - (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"))); - - /* Try to connect to the buses, if possible. */ - return bus_init(m, try_bus_connect); -} - static unsigned manager_dispatch_cleanup_queue(Manager *m) { Unit *u; unsigned n = 0; @@ -1374,6 +1354,38 @@ static void manager_distribute_fds(Manager *m, FDSet *fds) { } } +static bool manager_dbus_is_running(Manager *m, bool deserialized) { + Unit *u; + + assert(m); + + /* This checks whether the dbus instance we are supposed to expose our APIs on is up. We check both the socket + * and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit + * rather than the current one. */ + + if (m->test_run_flags != 0) + return false; + + /* If we are in the user instance, and the env var is already set for us, then this means D-Bus is ran + * somewhere outside of our own logic. Let's use it */ + if (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")) + return true; + + u = manager_get_unit(m, SPECIAL_DBUS_SOCKET); + if (!u) + return false; + if ((deserialized ? SOCKET(u)->deserialized_state : SOCKET(u)->state) != SOCKET_RUNNING) + return false; + + u = manager_get_unit(m, SPECIAL_DBUS_SERVICE); + if (!u) + return false; + if (!IN_SET((deserialized ? SERVICE(u)->deserialized_state : SERVICE(u)->state), SERVICE_RUNNING, SERVICE_RELOAD)) + return false; + + return true; +} + int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { int r; @@ -1454,9 +1466,22 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { /* This shouldn't fail, except if things are really broken. */ return r; - /* Let's connect to the bus now. */ - (void) manager_connect_bus(m, !!serialization); + /* Let's set up our private bus connection now, unconditionally */ + (void) bus_init_private(m); + /* If we are in --user mode also connect to the system bus now */ + if (MANAGER_IS_USER(m)) + (void) bus_init_system(m); + + /* Let's connect to the bus now, but only if the unit is supposed to be up */ + if (manager_dbus_is_running(m, !!serialization)) { + (void) bus_init_api(m); + + if (MANAGER_IS_SYSTEM(m)) + (void) bus_init_system(m); + } + + /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */ (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed); m->deserialized_subscribed = strv_free(m->deserialized_subscribed); @@ -2295,23 +2320,21 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t /* This is a nop on non-init */ break; - case SIGUSR1: { - Unit *u; + case SIGUSR1: - u = manager_get_unit(m, SPECIAL_DBUS_SERVICE); - - if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) { + if (manager_dbus_is_running(m, false)) { log_info("Trying to reconnect to bus..."); - bus_init(m, true); - } - if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) { - log_info("Loading D-Bus service..."); + (void) bus_init_api(m); + + if (MANAGER_IS_SYSTEM(m)) + (void) bus_init_system(m); + } else { + log_info("Starting D-Bus service..."); manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE); } break; - } case SIGUSR2: { _cleanup_free_ char *dump = NULL; @@ -3154,8 +3177,9 @@ int manager_reload(Manager *m) { exec_runtime_vacuum(m); - /* It might be safe to log to the journal now. */ + /* It might be safe to log to the journal now and connect to dbus */ manager_recheck_journal(m); + manager_recheck_dbus(m); /* Sync current state of bus names with our set of listening units */ if (m->api_bus) @@ -3519,6 +3543,27 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) { return 0; } +void manager_recheck_dbus(Manager *m) { + assert(m); + + /* Connects to the bus if the dbus service and socket are running. If we are running in user mode this is all + * it does. In system mode we'll also connect to the system bus (which will most likely just reuse the + * connection of the API bus). That's because the system bus after all runs as service of the system instance, + * while in the user instance we can assume it's already there. */ + + if (manager_dbus_is_running(m, false)) { + (void) bus_init_api(m); + + if (MANAGER_IS_SYSTEM(m)) + (void) bus_init_system(m); + } else { + (void) bus_done_api(m); + + if (MANAGER_IS_SYSTEM(m)) + (void) bus_done_system(m); + } +} + static bool manager_journal_is_running(Manager *m) { Unit *u; diff --git a/src/core/manager.h b/src/core/manager.h index fc70593def..b731e6e8f1 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -425,6 +425,7 @@ bool manager_unit_inactive_or_pending(Manager *m, const char *name); void manager_check_finished(Manager *m); +void manager_recheck_dbus(Manager *m); void manager_recheck_journal(Manager *m); void manager_set_show_status(Manager *m, ShowStatus mode); diff --git a/src/core/unit.c b/src/core/unit.c index 0ad7dc108b..47a06e4297 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2326,18 +2326,16 @@ static void unit_update_on_console(Unit *u) { } void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) { - Manager *m; bool unexpected; + Manager *m; assert(u); assert(os < _UNIT_ACTIVE_STATE_MAX); assert(ns < _UNIT_ACTIVE_STATE_MAX); - /* Note that this is called for all low-level state changes, - * even if they might map to the same high-level - * UnitActiveState! That means that ns == os is an expected - * behavior here. For example: if a mount point is remounted - * this function will be called too! */ + /* Note that this is called for all low-level state changes, even if they might map to the same high-level + * UnitActiveState! That means that ns == os is an expected behavior here. For example: if a mount point is + * remounted this function will be called too! */ m = u->manager; @@ -2463,12 +2461,6 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su /* Some names are special */ if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) { - if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) - /* The bus might have just become available, - * hence try to connect to it, if we aren't - * yet connected. */ - bus_init(m, true); - if (u->type == UNIT_SERVICE && !UNIT_IS_ACTIVE_OR_RELOADING(os) && !MANAGER_IS_RELOADING(m)) { @@ -2481,8 +2473,6 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su manager_send_unit_plymouth(m, u); } else { - /* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it - * anyway. */ if (UNIT_IS_INACTIVE_OR_FAILED(ns) && !UNIT_IS_INACTIVE_OR_FAILED(os) @@ -2512,17 +2502,15 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su } manager_recheck_journal(m); + manager_recheck_dbus(m); unit_trigger_notify(u); if (!MANAGER_IS_RELOADING(u->manager)) { - /* Maybe we finished startup and are now ready for - * being stopped because unneeded? */ + /* Maybe we finished startup and are now ready for being stopped because unneeded? */ unit_check_unneeded(u); - /* Maybe we finished startup, but something we needed - * has vanished? Let's die then. (This happens when - * something BindsTo= to a Type=oneshot unit, as these - * units go directly from starting to inactive, + /* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when + * something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive, * without ever entering started.) */ unit_check_binds_to(u); From 7d814a197a11077f7f5a54a9719f91150a10572e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 15:06:15 +0100 Subject: [PATCH 08/21] manager: tweak manager_journal_is_running() a bit regarding test mode In test mode, let's not consider the journal to be up ever: we want all output to go to stderr. --- src/core/manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/manager.c b/src/core/manager.c index 8ae70b648c..30a020f777 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3569,6 +3569,9 @@ static bool manager_journal_is_running(Manager *m) { assert(m); + if (m->test_run_flags != 0) + return false; + /* If we are the user manager we can safely assume that the journal is up */ if (!MANAGER_IS_SYSTEM(m)) return true; From 217677abb0fa31d85656f35d6a8f4a7632d54173 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 15:07:00 +0100 Subject: [PATCH 09/21] core: tweak manager_journal_is_running() a bit more Let's also use the journal if it is currently reloading. In that state it should also be able to process our requests. Moreover, we might otherwise end up disconnecting/reconnecting from the journal without really any need to hence, relax the check accordingly. --- src/core/manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/manager.c b/src/core/manager.c index 30a020f777..9c68eb49d1 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3587,7 +3587,7 @@ static bool manager_journal_is_running(Manager *m) { u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE); if (!u) return false; - if (SERVICE(u)->state != SERVICE_RUNNING) + if (!IN_SET(SERVICE(u)->state, SERVICE_RELOAD, SERVICE_RUNNING)) return false; return true; From cedf50888660476ebc5a239fbcfac665574bc0b3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 15:08:18 +0100 Subject: [PATCH 10/21] core: simplify manager_recheck_journal() a bit No need for an if check if we just pass along a bool anyway. --- src/core/manager.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index 9c68eb49d1..e83a0dd5e6 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3601,16 +3601,10 @@ void manager_recheck_journal(Manager *m) { if (getpid_cached() != 1) return; - if (manager_journal_is_running(m)) { - - /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. */ - log_set_prohibit_ipc(false); - } else { - - /* If the journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we - * might trigger an activation ourselves we can't fulfill */ - log_set_prohibit_ipc(true); - } + /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. If the + * journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we might trigger + * an activation ourselves we can't fulfill. */ + log_set_prohibit_ipc(!manager_journal_is_running(m)); log_open(); } From a6011d1887f30867c8e8236605fdb2bbd2c386a5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 16:30:11 +0100 Subject: [PATCH 11/21] core: update dbus policy file This patch does four things: 1. Adds more comments that clarify the order in which things appear in the file 2. All entries are placed in the order in which their SD_BUS_METHOD() macros appear in the C vtables. 3. A couple of missing entries are added that should be open to all or do polkit 4. Corrects the interface name for the GetProcesses() calls. They belong to the per-unit interface, not to Unit --- src/core/org.freedesktop.systemd1.conf | 162 ++++++++++++++++++++----- 1 file changed, 130 insertions(+), 32 deletions(-) diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index 97f6094b66..a97edac4ac 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -30,7 +30,7 @@ - + @@ -46,6 +46,8 @@ send_interface="org.freedesktop.DBus.Properties" send_member="GetAll"/> + + @@ -62,6 +64,10 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="LoadUnit"/> + + @@ -88,23 +94,7 @@ - - - - - - - - + send_member="ListUnitsByNames"/> + + + + + + + + @@ -134,7 +140,43 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="LookupDynamicUserByUID"/> - + + + + + + + + + + + + + + + + + + + + + + + + + + send_member="RefUnit"/> + + + + + + @@ -200,14 +254,6 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="Reexecute"/> - - - - @@ -224,10 +270,6 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="LinkUnitFiles"/> - - @@ -244,6 +286,10 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="UnmaskUnitFiles"/> + + @@ -256,6 +302,8 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="AddDependencyUnitFiles"/> + + @@ -268,6 +316,56 @@ send_interface="org.freedesktop.systemd1.Job" send_member="GetBefore"/> + + + + + + + + + + + + + + + + + + + + + + + + + + From 7cb609115c532c3591d43a604d67d72e508ba5d9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 18:11:18 +0100 Subject: [PATCH 12/21] user-util: also consider /bin/false and /bin/true as non-shell --- src/basic/user-util.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/basic/user-util.c b/src/basic/user-util.c index db18ee31c0..ceb71b61e8 100644 --- a/src/basic/user-util.c +++ b/src/basic/user-util.c @@ -197,6 +197,25 @@ int get_user_creds( return 0; } +static inline bool is_nologin_shell(const char *shell) { + + return PATH_IN_SET(shell, + /* 'nologin' is the friendliest way to disable logins for a user account. It prints a nice + * message and exits. Different distributions place the binary at different places though, + * hence let's list them all. */ + "/bin/nologin", + "/sbin/nologin", + "/usr/bin/nologin", + "/usr/sbin/nologin", + /* 'true' and 'false' work too for the same purpose, but are less friendly as they don't do + * any message printing. Different distributions place the binary at various places but at + * least not in the 'sbin' directory. */ + "/bin/false", + "/usr/bin/false", + "/bin/true", + "/usr/bin/true"); +} + int get_user_creds_clean( const char **username, uid_t *uid, gid_t *gid, @@ -212,11 +231,7 @@ int get_user_creds_clean( return r; if (shell && - (isempty(*shell) || PATH_IN_SET(*shell, - "/bin/nologin", - "/sbin/nologin", - "/usr/bin/nologin", - "/usr/sbin/nologin"))) + (isempty(*shell) || is_nologin_shell(*shell))) *shell = NULL; if (home && From 96cc44539b832d2666e1c286588a6d17758ce208 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 18:13:31 +0100 Subject: [PATCH 13/21] core: generalize how we acquire the Unit objects for unit names in bus calls This splits out the code that translates a unit name into a Unit* object from method_get_unit(), and reuses it all other functions that operate similar to it. This effectively means all those calls now optionally take an empty unit string which now means the same as the client's unit. This useful behaviour of the GetUnit() bus call is thus extended to all other matching bus calls. Similar, the same logic from method_load_unit() is also generalized and reused wherever appropriate. --- src/core/dbus-manager.c | 107 +++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 4fe374867c..945645c6b1 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -372,21 +372,16 @@ static int property_get_timer_slack_nsec( return sd_bus_message_append(reply, "t", (uint64_t) prctl(PR_GET_TIMERSLACK)); } -static int method_get_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { - _cleanup_free_ char *path = NULL; - Manager *m = userdata; - const char *name; +static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) { Unit *u; int r; - assert(message); assert(m); + assert(message); + assert(ret_unit); - /* Anyone can call this method */ - - r = sd_bus_message_read(message, "s", &name); - if (r < 0) - return r; + /* More or less a wrapper around manager_get_unit() that generates nice errors and has one trick up its sleeve: + * if the name is specified empty we use the client's unit. */ if (isempty(name)) { _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; @@ -409,6 +404,43 @@ static int method_get_unit(sd_bus_message *message, void *userdata, sd_bus_error return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not loaded.", name); } + *ret_unit = u; + return 0; +} + +static int bus_load_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) { + assert(m); + assert(message); + assert(ret_unit); + + /* Pretty much the same as bus_get_unit_by_name(), but we also load the unit if necessary. */ + + if (isempty(name)) + return bus_get_unit_by_name(m, message, name, ret_unit, error); + + return manager_load_unit(m, name, NULL, error, ret_unit); +} + +static int method_get_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_free_ char *path = NULL; + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + /* Anyone can call this method */ + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; + r = mac_selinux_unit_access_check(u, message, "status", error); if (r < 0) return r; @@ -541,26 +573,9 @@ static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_erro if (r < 0) return r; - if (isempty(name)) { - _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; - pid_t pid; - - r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds); - if (r < 0) - return r; - - r = sd_bus_creds_get_pid(creds, &pid); - if (r < 0) - return r; - - u = manager_get_unit_by_pid(m, pid); - if (!u) - return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client not member of any unit."); - } else { - r = manager_load_unit(m, name, NULL, error, &u); - if (r < 0) - return r; - } + r = bus_load_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; r = mac_selinux_unit_access_check(u, message, "status", error); if (r < 0) @@ -633,8 +648,10 @@ static int method_start_unit_replace(sd_bus_message *message, void *userdata, sd if (r < 0) return r; - u = manager_get_unit(m, old_name); - if (!u || !u->job || u->job->type != JOB_START) + r = bus_get_unit_by_name(m, message, old_name, &u, error); + if (r < 0) + return r; + if (!u->job || u->job->type != JOB_START) return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "No job queued for unit %s", old_name); return method_start_unit_generic(message, m, JOB_START, false, error); @@ -653,9 +670,9 @@ static int method_kill_unit(sd_bus_message *message, void *userdata, sd_bus_erro if (r < 0) return r; - u = manager_get_unit(m, name); - if (!u) - return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; return bus_unit_method_kill(message, u, error); } @@ -673,9 +690,9 @@ static int method_reset_failed_unit(sd_bus_message *message, void *userdata, sd_ if (r < 0) return r; - u = manager_get_unit(m, name); - if (!u) - return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; return bus_unit_method_reset_failed(message, u, error); } @@ -693,7 +710,7 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s if (r < 0) return r; - r = manager_load_unit(m, name, NULL, error, &u); + r = bus_load_unit_by_name(m, message, name, &u, error); if (r < 0) return r; @@ -717,7 +734,7 @@ static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error if (r < 0) return r; - r = manager_load_unit(m, name, NULL, error, &u); + r = bus_load_unit_by_name(m, message, name, &u, error); if (r < 0) return r; @@ -741,7 +758,7 @@ static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_err if (r < 0) return r; - r = manager_load_unit(m, name, NULL, error, &u); + r = bus_load_unit_by_name(m, message, name, &u, error); if (r < 0) return r; @@ -810,7 +827,7 @@ static int method_list_units_by_names(sd_bus_message *message, void *userdata, s if (!unit_name_is_valid(*unit, UNIT_NAME_ANY)) continue; - r = manager_load_unit(m, *unit, NULL, error, &u); + r = bus_load_unit_by_name(m, message, *unit, &u, error); if (r < 0) return r; @@ -839,9 +856,9 @@ static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd if (r < 0) return r; - u = manager_get_unit(m, name); - if (!u) - return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not loaded.", name); + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; return bus_unit_method_get_processes(message, u, error); } From 201e419aea8d9c6170091e1e4eefa6999e7d3e0b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:28:42 +0100 Subject: [PATCH 14/21] sd-bus: synthesize a description for user/system bus if otherwise unset Let's make debugging easier, by synthesizing a name when we have some indication what kind of bus this is. --- src/libsystemd/sd-bus/sd-bus.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c index c69c596c59..cbbc6b6f0c 100644 --- a/src/libsystemd/sd-bus/sd-bus.c +++ b/src/libsystemd/sd-bus/sd-bus.c @@ -3920,7 +3920,15 @@ _public_ int sd_bus_get_description(sd_bus *bus, const char **description) { assert_return(bus->description, -ENXIO); assert_return(!bus_pid_changed(bus), -ECHILD); - *description = bus->description; + if (bus->description) + *description = bus->description; + else if (bus->is_system) + *description = "system"; + else if (bus->is_user) + *description = "user"; + else + *description = NULL; + return 0; } From 030fa56c6eb1d9bd4c12ba4604934c592dcf180e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:30:17 +0100 Subject: [PATCH 15/21] core: set a description on private bus connections Let's make things easier to debug --- src/core/dbus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/dbus.c b/src/core/dbus.c index 65079135df..87739f1b74 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -653,6 +653,8 @@ static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void return 0; } + (void) sd_bus_set_description(bus, "private-bus-connection"); + r = sd_bus_set_fd(bus, nfd, nfd); if (r < 0) { log_warning_errno(r, "Failed to set fd on new connection bus: %m"); From 5f109056d536849f6079df9321042cf4838a41d3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:36:51 +0100 Subject: [PATCH 16/21] core: delay bus name synchronization after reload/reexec into a later event loop iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we'd synchronize bus names immediately when we succeeded connecting to the bus, potentially even before coldplugging the units. This was problematic, as synchronizing bus names meant invoking the per-unit name change handler function which might change the unit's state — which will result in consistency when done before we coldplug things. With this change we instead enqueue a job for the event loop to resync the names in a later loop iteration, i.e. at a point where we know coldplugging has finished. --- src/core/dbus.c | 57 +++++++++++++++++++++++++++++++++++++++------- src/core/dbus.h | 2 +- src/core/manager.c | 6 +++-- src/core/manager.h | 2 ++ 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/src/core/dbus.c b/src/core/dbus.c index 87739f1b74..5e6dd0dee5 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -727,17 +727,29 @@ static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void return 0; } -int manager_sync_bus_names(Manager *m, sd_bus *bus) { +static int manager_dispatch_sync_bus_names(sd_event_source *es, void *userdata) { _cleanup_strv_free_ char **names = NULL; + Manager *m = userdata; const char *name; Iterator i; Unit *u; int r; + assert(es); assert(m); - assert(bus); + assert(m->sync_bus_names_event_source == es); - r = sd_bus_list_names(bus, &names, NULL); + /* First things first, destroy the defer event so that we aren't triggered again */ + m->sync_bus_names_event_source = sd_event_source_unref(m->sync_bus_names_event_source); + + /* Let's see if there's anything to do still? */ + if (!m->api_bus) + return 0; + if (hashmap_isempty(m->watch_bus)) + return 0; + + /* OK, let's sync up the names. Let's see which names are currently on the bus. */ + r = sd_bus_list_names(m->api_bus, &names, NULL); if (r < 0) return log_error_errno(r, "Failed to get initial list of names: %m"); @@ -761,7 +773,7 @@ int manager_sync_bus_names(Manager *m, sd_bus *bus) { const char *unique; /* If it is, determine its current owner */ - r = sd_bus_get_name_creds(bus, name, SD_BUS_CREDS_UNIQUE_NAME, &creds); + r = sd_bus_get_name_creds(m->api_bus, name, SD_BUS_CREDS_UNIQUE_NAME, &creds); if (r < 0) { log_full_errno(r == -ENXIO ? LOG_DEBUG : LOG_ERR, r, "Failed to get bus name owner %s: %m", name); continue; @@ -795,6 +807,34 @@ int manager_sync_bus_names(Manager *m, sd_bus *bus) { return 0; } +int manager_enqueue_sync_bus_names(Manager *m) { + int r; + + assert(m); + + /* Enqueues a request to synchronize the bus names in a later event loop iteration. The callers generally don't + * want us to invoke ->bus_name_owner_change() unit calls from their stack frames as this might result in event + * dispatching on its own creating loops, hence we simply create a defer event for the event loop and exit. */ + + if (m->sync_bus_names_event_source) + return 0; + + r = sd_event_add_defer(m->event, &m->sync_bus_names_event_source, manager_dispatch_sync_bus_names, m); + if (r < 0) + return log_error_errno(r, "Failed to create bus name synchronization event: %m"); + + r = sd_event_source_set_priority(m->sync_bus_names_event_source, SD_EVENT_PRIORITY_IDLE); + if (r < 0) + return log_error_errno(r, "Failed to set event priority: %m"); + + r = sd_event_source_set_enabled(m->sync_bus_names_event_source, SD_EVENT_ONESHOT); + if (r < 0) + return log_error_errno(r, "Failed to set even to oneshot: %m"); + + (void) sd_event_source_set_description(m->sync_bus_names_event_source, "manager-sync-bus-names"); + return 0; +} + static int bus_setup_api(Manager *m, sd_bus *bus) { Iterator i; char *name; @@ -840,11 +880,8 @@ static int bus_setup_api(Manager *m, sd_bus *bus) { if (r < 0) return log_error_errno(r, "Failed to request name: %m"); - r = manager_sync_bus_names(m, bus); - if (r < 0) - return r; - log_debug("Successfully connected to API bus."); + return 0; } @@ -882,6 +919,10 @@ int bus_init_api(Manager *m) { m->api_bus = bus; bus = NULL; + r = manager_enqueue_sync_bus_names(m); + if (r < 0) + return r; + return 0; } diff --git a/src/core/dbus.h b/src/core/dbus.h index fb4988dace..3051ec0665 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -38,7 +38,7 @@ int bus_fdset_add_all(Manager *m, FDSet *fds); void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix); int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l); -int manager_sync_bus_names(Manager *m, sd_bus *bus); +int manager_enqueue_sync_bus_names(Manager *m); int bus_foreach_bus(Manager *m, sd_bus_track *subscribed2, int (*send_message)(sd_bus *bus, void *userdata), void *userdata); diff --git a/src/core/manager.c b/src/core/manager.c index e83a0dd5e6..3e3cc7ed80 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1203,6 +1203,7 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->run_queue_event_source); sd_event_source_unref(m->user_lookup_event_source); + sd_event_source_unref(m->sync_bus_names_event_source); safe_close(m->signal_fd); safe_close(m->notify_fd); @@ -3182,8 +3183,9 @@ int manager_reload(Manager *m) { manager_recheck_dbus(m); /* Sync current state of bus names with our set of listening units */ - if (m->api_bus) - manager_sync_bus_names(m, m->api_bus); + q = manager_enqueue_sync_bus_names(m); + if (q < 0 && r >= 0) + r = q; assert(m->n_reloading > 0); m->n_reloading--; diff --git a/src/core/manager.h b/src/core/manager.h index b731e6e8f1..d4eaaa1c4b 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -182,6 +182,8 @@ struct Manager { int user_lookup_fds[2]; sd_event_source *user_lookup_event_source; + sd_event_source *sync_bus_names_event_source; + UnitFileScope unit_file_scope; LookupPaths lookup_paths; Set *unit_path_cache; From dfeff66499e4e116e6debae765410eba9ba46d51 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:40:41 +0100 Subject: [PATCH 17/21] bus: when destroying a bus, also destroy per-unit bus track objects associated with it Let's not keep the old bus object pinned this way, let's destroy all relevant trackers for units, the way we already destroy them for jobs. --- src/core/dbus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/dbus.c b/src/core/dbus.c index 5e6dd0dee5..c7c96a3409 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -1087,6 +1087,10 @@ static void destroy_bus(Manager *m, sd_bus **bus) { if (j->bus_track && sd_bus_track_get_bus(j->bus_track) == *bus) j->bus_track = sd_bus_track_unref(j->bus_track); + HASHMAP_FOREACH(u, m->units, i) + if (u->bus_track && sd_bus_track_get_bus(u->bus_track) == *bus) + u->bus_track = sd_bus_track_unref(u->bus_track); + /* Get rid of queued message on this bus */ if (m->queued_message && sd_bus_message_get_bus(m->queued_message) == *bus) m->queued_message = sd_bus_message_unref(m->queued_message); From 6edd281cb88e1efc6253b418d03fdbf1d05b0af3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:44:18 +0100 Subject: [PATCH 18/21] bus: in bus_foreach_bus() don't bother with api_bus if it is NULL Let's better be safe than sorry, and validate that api_bus is not NULL before we send messages to it. Of course, strictly speaking this shouldn't actually be necessary, as the tracker object should not exist without the bus, but let's be extra sure. --- src/core/dbus.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/dbus.c b/src/core/dbus.c index c7c96a3409..04bed73b65 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -1204,8 +1204,9 @@ int bus_foreach_bus( } /* Send to API bus, but only if somebody is subscribed */ - if (sd_bus_track_count(m->subscribed) > 0 || - sd_bus_track_count(subscribed2) > 0) { + if (m->api_bus && + (sd_bus_track_count(m->subscribed) > 0 || + sd_bus_track_count(subscribed2) > 0)) { r = send_message(m->api_bus, userdata); if (r < 0) ret = r; From 931e47547d97e2d72e748d8147f9adba6113473b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:47:26 +0100 Subject: [PATCH 19/21] core: in bus_init_system() make sure we setup the system bus even if we inherit the API bus This corrects the control flow: when we reuse the API bus as system bus, let's definitely invoke bus_init_system() too, so that it is called regardless how we acquired the bus object. (Note that this doesn't actually change anything, as we only inherit the bus like this in system mode, and bus_init_system() doesn't do anything in system bus, besides writing a log message) --- src/core/dbus.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/core/dbus.c b/src/core/dbus.c index 04bed73b65..305824da9b 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -958,23 +958,22 @@ int bus_init_system(Manager *m) { return 0; /* The API and system bus is the same if we are running in system mode */ - if (MANAGER_IS_SYSTEM(m) && m->api_bus) { - m->system_bus = sd_bus_ref(m->api_bus); - return 0; + if (MANAGER_IS_SYSTEM(m) && m->api_bus) + bus = sd_bus_ref(m->api_bus); + else { + r = sd_bus_open_system(&bus); + if (r < 0) + return log_error_errno(r, "Failed to connect to system bus: %m"); + + r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return log_error_errno(r, "Failed to attach system bus to event loop: %m"); + + r = bus_setup_disconnected_match(m, bus); + if (r < 0) + return r; } - r = sd_bus_open_system(&bus); - if (r < 0) - return log_error_errno(r, "Failed to connect to system bus: %m"); - - r = bus_setup_disconnected_match(m, bus); - if (r < 0) - return r; - - r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL); - if (r < 0) - return log_error_errno(r, "Failed to attach system bus to event loop: %m"); - r = bus_setup_system(m, bus); if (r < 0) return log_error_errno(r, "Failed to set up system bus: %m"); From 6592b9759cae509b407a3b49603498468bf5d276 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 22:52:52 +0100 Subject: [PATCH 20/21] core: add new new bus call for migrating foreign processes to scope/service units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new bus call to service and scope units called AttachProcesses() that moves arbitrary processes into the cgroup of the unit. The primary user for this new API is systemd itself: the systemd --user instance uses this call of the systemd --system instance to migrate processes if itself gets the request to migrate processes and the kernel refuses this due to access restrictions. The primary use-case of this is to make "systemd-run --scope --user …" invoked from user session scopes work correctly on pure cgroupsv2 environments. There, the kernel refuses to migrate processes between two unprivileged-owned cgroups unless the requestor as well as the ownership of the closest parent cgroup all match. This however is not the case between the session-XYZ.scope unit of a login session and the user@ABC.service of the systemd --user instance. The new logic always tries to move the processes on its own, but if that doesn't work when being the user manager, then the system manager is asked to do it instead. The new operation is relatively restrictive: it will only allow to move the processes like this if the caller is root, or the UID of the target unit, caller and process all match. Note that this means that unprivileged users cannot attach processes to scope units, as those do not have "owning" users (i.e. they have now User= field). Fixes: #3388 --- src/core/cgroup.c | 145 +++++++++++++++++++++++-- src/core/cgroup.h | 3 +- src/core/dbus-manager.c | 21 ++++ src/core/dbus-scope.c | 32 +++++- src/core/dbus-unit.c | 113 +++++++++++++++++++ src/core/dbus-unit.h | 1 + src/core/org.freedesktop.systemd1.conf | 16 +++ src/core/scope.c | 2 +- src/core/unit.c | 28 +++++ src/core/unit.h | 2 + 10 files changed, 346 insertions(+), 17 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index d05e338d11..edb702ce48 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -24,6 +24,7 @@ #include "alloc-util.h" #include "blockdev-util.h" #include "bpf-firewall.h" +#include "bus-error.h" #include "cgroup-util.h" #include "cgroup.h" #include "fd-util.h" @@ -1303,13 +1304,12 @@ void unit_update_cgroup_members_masks(Unit *u) { } } -static const char *migrate_callback(CGroupMask mask, void *userdata) { - Unit *u = userdata; +const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) { - assert(mask != 0); - assert(u); + /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */ while (u) { + if (u->cgroup_path && u->cgroup_realized && (u->cgroup_realized_mask & mask) == mask) @@ -1321,6 +1321,10 @@ static const char *migrate_callback(CGroupMask mask, void *userdata) { return NULL; } +static const char *migrate_callback(CGroupMask mask, void *userdata) { + return unit_get_realized_cgroup_path(userdata, mask); +} + char *unit_default_cgroup_path(Unit *u) { _cleanup_free_ char *escaped = NULL, *slice = NULL; int r; @@ -1503,19 +1507,142 @@ static int unit_create_cgroup( return 0; } -int unit_attach_pids_to_cgroup(Unit *u) { +static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + char *pp; int r; + assert(u); + if (MANAGER_IS_SYSTEM(u->manager)) + return -EINVAL; + + if (!u->manager->system_bus) + return -EIO; + + if (!u->cgroup_path) + return -EINVAL; + + /* Determine this unit's cgroup path relative to our cgroup root */ + pp = path_startswith(u->cgroup_path, u->manager->cgroup_root); + if (!pp) + return -EINVAL; + + pp = strjoina("/", pp, suffix_path); + path_kill_slashes(pp); + + r = sd_bus_call_method(u->manager->system_bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "AttachProcessesToUnit", + &error, NULL, + "ssau", + NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid); + if (r < 0) + return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r)); + + return 0; +} + +int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) { + CGroupMask delegated_mask; + const char *p; + Iterator i; + void *pidp; + int r, q; + + assert(u); + + if (!UNIT_HAS_CGROUP_CONTEXT(u)) + return -EINVAL; + + if (set_isempty(pids)) + return 0; + r = unit_realize_cgroup(u); if (r < 0) return r; - r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u); - if (r < 0) - return r; + if (isempty(suffix_path)) + p = u->cgroup_path; + else + p = strjoina(u->cgroup_path, "/", suffix_path); - return 0; + delegated_mask = unit_get_delegate_mask(u); + + r = 0; + SET_FOREACH(pidp, pids, i) { + pid_t pid = PTR_TO_PID(pidp); + CGroupController c; + + /* First, attach the PID to the main cgroup hierarchy */ + q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid); + if (q < 0) { + log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p); + + if (MANAGER_IS_USER(u->manager) && IN_SET(q, -EPERM, -EACCES)) { + int z; + + /* If we are in a user instance, and we can't move the process ourselves due to + * permission problems, let's ask the system instance about it instead. Since it's more + * privileged it might be able to move the process across the leaves of a subtree who's + * top node is not owned by us. */ + + z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path); + if (z < 0) + log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p); + else + continue; /* When the bus thing worked via the bus we are fully done for this PID. */ + } + + if (r >= 0) + r = q; /* Remember first error */ + + continue; + } + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + continue; + + /* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the + * innermost realized one */ + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *realized; + + if (!(u->manager->cgroup_supported & bit)) + continue; + + /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */ + if (delegated_mask & u->cgroup_realized_mask & bit) { + q = cg_attach(cgroup_controller_to_string(c), p, pid); + if (q >= 0) + continue; /* Success! */ + + log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m", + pid, p, cgroup_controller_to_string(c)); + } + + /* So this controller is either not delegate or realized, or something else weird happened. In + * that case let's attach the PID at least to the closest cgroup up the tree that is + * realized. */ + realized = unit_get_realized_cgroup_path(u, bit); + if (!realized) + continue; /* Not even realized in the root slice? Then let's not bother */ + + q = cg_attach(cgroup_controller_to_string(c), realized, pid); + if (q < 0) + log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m", + pid, realized, cgroup_controller_to_string(c)); + } + } + + return r; } static void cgroup_xattr_apply(Unit *u) { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 5113313405..e2e875d1c7 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -167,6 +167,7 @@ bool unit_get_needs_bpf(Unit *u); void unit_update_cgroup_members_masks(Unit *u); +const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); char *unit_default_cgroup_path(Unit *u); int unit_set_cgroup_path(Unit *u, const char *path); int unit_pick_cgroup_path(Unit *u); @@ -178,7 +179,7 @@ int unit_watch_cgroup(Unit *u); void unit_add_to_cgroup_empty_queue(Unit *u); -int unit_attach_pids_to_cgroup(Unit *u); +int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path); int manager_setup_cgroup(Manager *m); void manager_shutdown_cgroup(Manager *m, bool delete); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 945645c6b1..889779d548 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -863,6 +863,26 @@ static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd return bus_unit_method_get_processes(message, u, error); } +static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; + + return bus_unit_method_attach_processes(message, u, error); +} + static int transient_unit_from_message( Manager *m, sd_bus_message *message, @@ -2504,6 +2524,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED), diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index a0c4a65b33..4d9ced81de 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -89,17 +89,39 @@ static int bus_scope_set_transient_property( return bus_set_transient_usec(UNIT(s), name, &s->timeout_stop_usec, message, flags, error); if (streq(name, "PIDs")) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; unsigned n = 0; - uint32_t pid; r = sd_bus_message_enter_container(message, 'a', "u"); if (r < 0) return r; - while ((r = sd_bus_message_read(message, "u", &pid)) > 0) { + for (;;) { + uint32_t upid; + pid_t pid; - if (pid <= 1) - return -EINVAL; + r = sd_bus_message_read(message, "u", &upid); + if (r < 0) + return r; + if (r == 0) + break; + + if (upid == 0) { + if (!creds) { + r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds); + if (r < 0) + return r; + } + + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return r; + } else + pid = (uid_t) upid; + + r = unit_pid_attachable(UNIT(s), pid, error); + if (r < 0) + return r; if (!UNIT_WRITE_FLAGS_NOOP(flags)) { r = unit_watch_pid(UNIT(s), pid); @@ -109,8 +131,6 @@ static int bus_scope_set_transient_property( n++; } - if (r < 0) - return r; r = sd_bus_message_exit_container(message); if (r < 0) diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 7085eee930..50a5ab9819 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1127,6 +1127,118 @@ static int property_get_ip_counter( return sd_bus_message_append(reply, "t", value); } +int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) { + + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + _cleanup_(set_freep) Set *pids = NULL; + Unit *u = userdata; + const char *path; + int r; + + assert(message); + + /* This migrates the processes with the specified PIDs into the cgroup of this unit, optionally below a + * specified cgroup path. Obviously this only works for units that actually maintain a cgroup + * representation. If a process is already in the cgroup no operation is executed – in this case the specified + * subcgroup path has no effect! */ + + r = mac_selinux_unit_access_check(u, message, "start", error); + if (r < 0) + return r; + + r = sd_bus_message_read(message, "s", &path); + if (r < 0) + return r; + + path = empty_to_null(path); + if (path) { + if (!path_is_absolute(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not absolute: %s", path); + + if (!path_is_normalized(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not normalized: %s", path); + } + + if (!unit_cgroup_delegate(u)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process migration not available on non-delegated units."); + + if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u))) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not active, refusing."); + + r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(message, 'a', "u"); + if (r < 0) + return r; + for (;;) { + uid_t process_uid, sender_uid; + uint32_t upid; + pid_t pid; + + r = sd_bus_message_read(message, "u", &upid); + if (r < 0) + return r; + if (r == 0) + break; + + if (upid == 0) { + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return r; + } else + pid = (uid_t) upid; + + /* Filter out duplicates */ + if (set_contains(pids, PID_TO_PTR(pid))) + continue; + + /* Check if this process is suitable for attaching to this unit */ + r = unit_pid_attachable(u, pid, error); + if (r < 0) + return r; + + /* Let's query the sender's UID, so that we can make our security decisions */ + r = sd_bus_creds_get_euid(creds, &sender_uid); + if (r < 0) + return r; + + /* Let's validate security: if the sender is root, then all is OK. If the sender is is any other unit, + * then the process' UID and the target unit's UID have to match the sender's UID */ + if (sender_uid != 0 && sender_uid != getuid()) { + r = get_process_uid(pid, &process_uid); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to retrieve process UID: %m"); + + if (process_uid != sender_uid) + return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by client's UID. Refusing.", pid); + if (process_uid != u->ref_uid) + return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by target unit's UID. Refusing.", pid); + } + + if (!pids) { + pids = set_new(NULL); + if (!pids) + return -ENOMEM; + } + + r = set_put(pids, PID_TO_PTR(pid)); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + r = unit_attach_pids_to_cgroup(u, pids, path); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to attach processes to control group: %m"); + + return sd_bus_reply_method_return(message, NULL); +} + const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0), @@ -1139,6 +1251,7 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0), SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0), SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("AttachProcesses", "sau", NULL, bus_unit_method_attach_processes, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_VTABLE_END }; diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index d7066eefc8..bb4e43f5ca 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -37,6 +37,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitWriteFlags flags, bool commit, sd_bus_error *error); int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error); diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index a97edac4ac..645c8f1659 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -234,6 +234,10 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="StartTransientUnit"/> + + @@ -366,6 +370,18 @@ send_interface="org.freedesktop.systemd1.Unit" send_member="Unref"/> + + + + + + + + diff --git a/src/core/scope.c b/src/core/scope.c index 47c47bc708..5b9c2bb3c4 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -343,7 +343,7 @@ static int scope_start(Unit *u) { unit_export_state_files(UNIT(s)); - r = unit_attach_pids_to_cgroup(u); + r = unit_attach_pids_to_cgroup(u, UNIT(s)->pids, NULL); if (r < 0) { log_unit_warning_errno(UNIT(s), r, "Failed to add PIDs to scope's control group: %m"); scope_enter_dead(s, SCOPE_FAILURE_RESOURCES); diff --git a/src/core/unit.c b/src/core/unit.c index 47a06e4297..26d97fa45e 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -5362,6 +5362,34 @@ const char *unit_label_path(Unit *u) { return p; } +int unit_pid_attachable(Unit *u, pid_t pid, sd_bus_error *error) { + int r; + + assert(u); + + /* Checks whether the specified PID is generally good for attaching, i.e. a valid PID, not our manager itself, + * and not a kernel thread either */ + + /* First, a simple range check */ + if (!pid_is_valid(pid)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process identifier " PID_FMT " is not valid.", pid); + + /* Some extra safety check */ + if (pid == 1 || pid == getpid_cached()) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a manager processs, refusing.", pid); + + /* Don't even begin to bother with kernel threads */ + r = is_kernel_thread(pid); + if (r == -ESRCH) + return sd_bus_error_setf(error, SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "Process with ID " PID_FMT " does not exist.", pid); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to determine whether process " PID_FMT " is a kernel thread: %m", pid); + if (r > 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a kernel thread, refusing.", pid); + + return 0; +} + static const char* const collect_mode_table[_COLLECT_MODE_MAX] = { [COLLECT_INACTIVE] = "inactive", [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed", diff --git a/src/core/unit.h b/src/core/unit.h index 6d49c7d5f2..617f044e24 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -806,6 +806,8 @@ bool unit_needs_console(Unit *u); const char *unit_label_path(Unit *u); +int unit_pid_attachable(Unit *unit, pid_t pid, sd_bus_error *error); + /* Macros which append UNIT= or USER_UNIT= to the message */ #define log_unit_full(unit, level, error, ...) \ From 1e78432157d14c967e2fd65a7822179cfebdfc62 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Feb 2018 23:03:13 +0100 Subject: [PATCH 21/21] update TODO --- TODO | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/TODO b/TODO index a6cdae84a0..17022d912b 100644 --- a/TODO +++ b/TODO @@ -24,8 +24,13 @@ Janitorial Clean-ups: Features: -* check what setting the login shell to /bin/false vs. /sbin/nologin means and - do the right thing in get_user_creds_clean() with it. +* block setrlimit(RLIMIT_NOPROC) (and other per-user limits) in nspawn when userns is not on + +* nss-systemd: implement enumeration, that shows all dynamic users plus the + synthesized ones if necessary, so that "getent passwd" shows useful data. + +* teach tmpfiles.d q/Q logic something sensible in the context of XFS/ext4 + project quota * maybe rework get_user_creds() to query the user database if $SHELL is used for root, but only then. @@ -378,8 +383,6 @@ Features: * what to do about udev db binary stability for apps? (raw access is not an option) -* maybe provide an API to allow migration of foreign PIDs into existing scopes. - * man: maybe use the word "inspect" rather than "introspect"? * systemctl: if some operation fails, show log output?