From a8ba86ce6a78f2bd80529e535af3494bf8297cfd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 13:19:17 +0200 Subject: [PATCH 01/20] man: always use the same example in nss module documentation Show the same recommended example file in all three man pages, just highlight the different, relevant parts. This should be less confusing for users, and clarify what we actually recommend how /etc/nsswitch.conf is set up. --- man/nss-myhostname.xml | 4 ++-- man/nss-resolve.xml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/nss-myhostname.xml b/man/nss-myhostname.xml index b7b7e1b555..4481fdf8cb 100644 --- a/man/nss-myhostname.xml +++ b/man/nss-myhostname.xml @@ -111,8 +111,8 @@ Here's an example /etc/nsswitch.conf file, that enables myhostname correctly: -passwd: compat -group: compat +passwd: compat mymachines +group: compat mymachines shadow: compat hosts: files resolve mymachines myhostname diff --git a/man/nss-resolve.xml b/man/nss-resolve.xml index dd402b359c..7d291b83c1 100644 --- a/man/nss-resolve.xml +++ b/man/nss-resolve.xml @@ -82,8 +82,8 @@ Here's an example /etc/nsswitch.conf file, that enables resolve correctly: -passwd: compat -group: compat +passwd: compat mymachines +group: compat mymachines shadow: compat hosts: files resolve mymachines myhostname From fea72cc0336f4d90875cdddc1aa9739dcbb174f5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 13:22:51 +0200 Subject: [PATCH 02/20] macro: introduce new PID_TO_PTR macros and make use of them This adds a new PID_TO_PTR() macro, plus PTR_TO_PID() and makes use of it wherever we maintain processes in a hash table. Previously we sometimes used LONG_TO_PTR() and other times ULONG_TO_PTR() for that, hence let's make this more explicit and clean up things. --- src/basic/cgroup-util.c | 10 +++++----- src/basic/macro.h | 3 +++ src/core/cgroup.c | 4 ++-- src/core/killall.c | 6 +++--- src/core/manager.c | 8 ++++---- src/core/unit.c | 20 ++++++++++---------- 6 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 0ebe570bb8..74e1668a17 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -187,7 +187,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo if (ignore_self && pid == my_pid) continue; - if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid)) + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) continue; /* If we haven't killed this process yet, kill @@ -205,7 +205,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo done = false; - r = set_put(s, LONG_TO_PTR(pid)); + r = set_put(s, PID_TO_PTR(pid)); if (r < 0) { if (ret >= 0) return r; @@ -318,7 +318,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char if (ignore_self && pid == my_pid) continue; - if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid)) + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) continue; /* Ignore kernel threads. Since they can only @@ -338,7 +338,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char done = false; - r = set_put(s, LONG_TO_PTR(pid)); + r = set_put(s, PID_TO_PTR(pid)); if (r < 0) { if (ret >= 0) return r; @@ -1898,7 +1898,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int r = 0; SET_FOREACH(pidp, pids, i) { - pid_t pid = PTR_TO_LONG(pidp); + pid_t pid = PTR_TO_PID(pidp); int q; q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); diff --git a/src/basic/macro.h b/src/basic/macro.h index 627d768b76..cbc3ca97b8 100644 --- a/src/basic/macro.h +++ b/src/basic/macro.h @@ -298,6 +298,9 @@ static inline unsigned long ALIGN_POWER2(unsigned long u) { #define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1)) #define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1)) +#define PTR_TO_PID(p) ((pid_t) ((uintptr_t) p)) +#define PID_TO_PTR(p) ((void*) ((uintptr_t) p)) + #define memzero(x,l) (memset((x), 0, (l))) #define zero(x) (memzero(&(x), sizeof(x))) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 1e78f871c7..9f06b28f26 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1391,11 +1391,11 @@ Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { if (pid == 1) return hashmap_get(m->units, SPECIAL_INIT_SCOPE); - u = hashmap_get(m->watch_pids1, LONG_TO_PTR(pid)); + u = hashmap_get(m->watch_pids1, PID_TO_PTR(pid)); if (u) return u; - u = hashmap_get(m->watch_pids2, LONG_TO_PTR(pid)); + u = hashmap_get(m->watch_pids2, PID_TO_PTR(pid)); if (u) return u; diff --git a/src/core/killall.c b/src/core/killall.c index 2a9d72c901..ee5d388560 100644 --- a/src/core/killall.c +++ b/src/core/killall.c @@ -108,7 +108,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) { return; } - set_remove(pids, ULONG_TO_PTR(pid)); + (void) set_remove(pids, PID_TO_PTR(pid)); } /* Now explicitly check who might be remaining, who @@ -117,7 +117,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) { /* We misuse getpgid as a check whether a * process still exists. */ - if (getpgid((pid_t) PTR_TO_ULONG(p)) >= 0) + if (getpgid(PTR_TO_PID(p)) >= 0) continue; if (errno != ESRCH) @@ -179,7 +179,7 @@ static int killall(int sig, Set *pids, bool send_sighup) { if (kill(pid, sig) >= 0) { if (pids) { - r = set_put(pids, ULONG_TO_PTR(pid)); + r = set_put(pids, PID_TO_PTR(pid)); if (r < 0) log_oom(); } diff --git a/src/core/manager.c b/src/core/manager.c index c3327e37f5..c2d262a5d3 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1591,13 +1591,13 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t found = true; } - u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid)); + u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); if (u2 && u2 != u1) { manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds); found = true; } - u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid)); + u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); if (u3 && u3 != u2 && u3 != u1) { manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds); found = true; @@ -1666,10 +1666,10 @@ static int manager_dispatch_sigchld(Manager *m) { u1 = manager_get_unit_by_pid(m, si.si_pid); if (u1) invoke_sigchld_event(m, u1, &si); - u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(si.si_pid)); + u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid)); if (u2 && u2 != u1) invoke_sigchld_event(m, u2, &si); - u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(si.si_pid)); + u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid)); if (u3 && u3 != u2 && u3 != u1) invoke_sigchld_event(m, u3, &si); } diff --git a/src/core/unit.c b/src/core/unit.c index 8c07c6140d..a5714adf38 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -1995,16 +1995,16 @@ int unit_watch_pid(Unit *u, pid_t pid) { if (r < 0) return r; - r = hashmap_put(u->manager->watch_pids1, LONG_TO_PTR(pid), u); + r = hashmap_put(u->manager->watch_pids1, PID_TO_PTR(pid), u); if (r == -EEXIST) { r = hashmap_ensure_allocated(&u->manager->watch_pids2, NULL); if (r < 0) return r; - r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u); + r = hashmap_put(u->manager->watch_pids2, PID_TO_PTR(pid), u); } - q = set_put(u->pids, LONG_TO_PTR(pid)); + q = set_put(u->pids, PID_TO_PTR(pid)); if (q < 0) return q; @@ -2015,16 +2015,16 @@ void unit_unwatch_pid(Unit *u, pid_t pid) { assert(u); assert(pid >= 1); - (void) hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u); - (void) hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u); - (void) set_remove(u->pids, LONG_TO_PTR(pid)); + (void) hashmap_remove_value(u->manager->watch_pids1, PID_TO_PTR(pid), u); + (void) hashmap_remove_value(u->manager->watch_pids2, PID_TO_PTR(pid), u); + (void) set_remove(u->pids, PID_TO_PTR(pid)); } void unit_unwatch_all_pids(Unit *u) { assert(u); while (!set_isempty(u->pids)) - unit_unwatch_pid(u, PTR_TO_LONG(set_first(u->pids))); + unit_unwatch_pid(u, PTR_TO_PID(set_first(u->pids))); u->pids = set_free(u->pids); } @@ -2038,7 +2038,7 @@ void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) { /* Cleans dead PIDs from our list */ SET_FOREACH(e, u->pids, i) { - pid_t pid = PTR_TO_LONG(e); + pid_t pid = PTR_TO_PID(e); if (pid == except1 || pid == except2) continue; @@ -2993,13 +2993,13 @@ static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) { /* Exclude the main/control pids from being killed via the cgroup */ if (main_pid > 0) { - r = set_put(pid_set, LONG_TO_PTR(main_pid)); + r = set_put(pid_set, PID_TO_PTR(main_pid)); if (r < 0) goto fail; } if (control_pid > 0) { - r = set_put(pid_set, LONG_TO_PTR(control_pid)); + r = set_put(pid_set, PID_TO_PTR(control_pid)); if (r < 0) goto fail; } From a1f686daf55636f9446a4c017f2488a35595395e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 13:29:53 +0200 Subject: [PATCH 03/20] util: add new uid_is_valid() call This simply factors out the uid validation checks from parse_uid() and uses them everywhere. This simply verifies that the passed UID is neither 64bit -1 nor 32bit -1. --- src/basic/util.c | 20 ++++++++++++++------ src/basic/util.h | 5 ++++- src/libsystemd/sd-login/sd-login.c | 5 +++++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/basic/util.c b/src/basic/util.c index f01f5f237b..beec7cb7dd 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -373,6 +373,19 @@ int parse_pid(const char *s, pid_t* ret_pid) { return 0; } +bool uid_is_valid(uid_t uid) { + + /* Some libc APIs use UID_INVALID as special placeholder */ + if (uid == (uid_t) 0xFFFFFFFF) + return false; + + /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */ + if (uid == (uid_t) 0xFFFF) + return false; + + return true; +} + int parse_uid(const char *s, uid_t* ret_uid) { unsigned long ul = 0; uid_t uid; @@ -389,12 +402,7 @@ int parse_uid(const char *s, uid_t* ret_uid) { if ((unsigned long) uid != ul) return -ERANGE; - /* Some libc APIs use UID_INVALID as special placeholder */ - if (uid == (uid_t) 0xFFFFFFFF) - return -ENXIO; - - /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */ - if (uid == (uid_t) 0xFFFF) + if (!uid_is_valid(uid)) return -ENXIO; if (ret_uid) diff --git a/src/basic/util.h b/src/basic/util.h index ff7a00e928..f8e32360f0 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -154,7 +154,10 @@ int parse_size(const char *t, off_t base, off_t *size); int parse_boolean(const char *v) _pure_; int parse_pid(const char *s, pid_t* ret_pid); int parse_uid(const char *s, uid_t* ret_uid); -#define parse_gid(s, ret_uid) parse_uid(s, ret_uid) +#define parse_gid(s, ret_gid) parse_uid(s, ret_gid) + +bool uid_is_valid(uid_t uid); +#define gid_is_valid(gid) uid_is_valid(gid) int safe_atou(const char *s, unsigned *ret_u); int safe_atoi(const char *s, int *ret_i); diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c index 7d6a4b78cf..6300162ebe 100644 --- a/src/libsystemd/sd-login/sd-login.c +++ b/src/libsystemd/sd-login/sd-login.c @@ -203,6 +203,7 @@ _public_ int sd_uid_get_state(uid_t uid, char**state) { char *s = NULL; int r; + assert_return(uid_is_valid(uid), -EINVAL); assert_return(state, -EINVAL); r = file_of_uid(uid, &p); @@ -230,6 +231,7 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) { _cleanup_free_ char *p = NULL, *s = NULL; int r; + assert_return(uid_is_valid(uid), -EINVAL); assert_return(session, -EINVAL); r = file_of_uid(uid, &p); @@ -257,6 +259,7 @@ _public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) int r; const char *word, *variable, *state; + assert_return(uid_is_valid(uid), -EINVAL); assert_return(seat, -EINVAL); variable = require_active ? "ACTIVE_UID" : "UIDS"; @@ -289,6 +292,8 @@ static int uid_get_array(uid_t uid, const char *variable, char ***array) { char **a; int r; + assert_return(uid_is_valid(uid), -EINVAL); + r = file_of_uid(uid, &p); if (r < 0) return r; From 569b19d8fedc4525a6c15f1a3ab9c15dcb7c694a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 14:56:26 +0200 Subject: [PATCH 04/20] cgroup: move controller to dirname translation into join_path_legacy() Let's simplify things a bit. --- src/basic/cgroup-util.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 74e1668a17..8299f6ffd5 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -460,20 +460,23 @@ static const char *controller_to_dirname(const char *controller) { return controller; } -static int join_path_legacy(const char *controller_dn, const char *path, const char *suffix, char **fs) { +static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) { + const char *dn; char *t = NULL; assert(fs); - assert(controller_dn); + assert(controller); + + dn = controller_to_dirname(controller); if (isempty(path) && isempty(suffix)) - t = strappend("/sys/fs/cgroup/", controller_dn); + t = strappend("/sys/fs/cgroup/", dn); else if (isempty(path)) - t = strjoin("/sys/fs/cgroup/", controller_dn, "/", suffix, NULL); + t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL); else if (isempty(suffix)) - t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, NULL); + t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL); else - t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, "/", suffix, NULL); + t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL); if (!t) return -ENOMEM; @@ -509,8 +512,8 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (!controller) { char *t; - /* If no controller is specified, we assume only the - * path below the controller matters */ + /* If no controller is specified, we return the path + * *below* the controllers, without any prefix. */ if (!path && !suffix) return -EINVAL; @@ -537,14 +540,8 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (unified > 0) r = join_path_unified(path, suffix, fs); - else { - const char *dn; - - dn = controller_to_dirname(controller); - - r = join_path_legacy(dn, path, suffix, fs); - } - + else + r = join_path_legacy(controller, path, suffix, fs); if (r < 0) return r; From b3ac818be8d73c97dc4f74ba8bbd2091506df2e6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 14:57:44 +0200 Subject: [PATCH 05/20] core: split up manager_get_unit_by_pid() Let's move the actual cgroup part of it into a new separate function manager_get_unit_by_pid_cgroup(), and then make manager_get_unit_by_pid() just a wrapper that also checks the two pid hashmaps. Then, let's make sure the various calls that want to deliver events to the owners of a PID check both hashmaps and the cgroup and deliver the event to *each* of them. OTOH make sure bus calls like GetUnitByPID() continue to check the PID hashmaps first and the cgroup only as fallback. --- src/core/cgroup.c | 24 +++++++++++++++++------- src/core/cgroup.h | 1 + src/core/manager.c | 4 ++-- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 9f06b28f26..0a5a08aeba 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1378,13 +1378,27 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) { } } -Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { +Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) { _cleanup_free_ char *cgroup = NULL; - Unit *u; int r; assert(m); + if (pid <= 0) + return NULL; + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); + if (r < 0) + return NULL; + + return manager_get_unit_by_cgroup(m, cgroup); +} + +Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { + Unit *u; + + assert(m); + if (pid <= 0) return NULL; @@ -1399,11 +1413,7 @@ Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { if (u) return u; - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); - if (r < 0) - return NULL; - - return manager_get_unit_by_cgroup(m, cgroup); + return manager_get_unit_by_pid_cgroup(m, pid); } int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 1ce21f43f2..438f5bf50f 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -130,6 +130,7 @@ void manager_shutdown_cgroup(Manager *m, bool delete); unsigned manager_dispatch_cgroup_queue(Manager *m); Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup); +Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid); Unit* manager_get_unit_by_pid(Manager *m, pid_t pid); int unit_search_main_pid(Unit *u, pid_t *ret); diff --git a/src/core/manager.c b/src/core/manager.c index c2d262a5d3..fc10ddb5d9 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1585,7 +1585,7 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ - u1 = manager_get_unit_by_pid(m, ucred->pid); + u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); if (u1) { manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds); found = true; @@ -1663,7 +1663,7 @@ static int manager_dispatch_sigchld(Manager *m) { /* And now figure out the unit this belongs * to, it might be multiple... */ - u1 = manager_get_unit_by_pid(m, si.si_pid); + u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); if (u1) invoke_sigchld_event(m, u1, &si); u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid)); From ef5c570edfd8afb20e3b04d3711e111a1dea0548 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 18:23:26 +0200 Subject: [PATCH 06/20] util: document why parse_uid() returns ENXIO parse_uid() returns EINVAL for invalid strings, but ENXIO for the (uid_t) -1 user ids in order to distinguish these two cases. Document this. --- src/basic/util.c | 5 ++++- src/test/test-util.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/basic/util.c b/src/basic/util.c index beec7cb7dd..86aacad307 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -403,7 +403,10 @@ int parse_uid(const char *s, uid_t* ret_uid) { return -ERANGE; if (!uid_is_valid(uid)) - return -ENXIO; + return -ENXIO; /* we return ENXIO instead of EINVAL + * here, to make it easy to distuingish + * invalid numeric uids invalid + * strings. */ if (ret_uid) *ret_uid = uid; diff --git a/src/test/test-util.c b/src/test/test-util.c index dff38ab6f6..10d8d210d5 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -282,6 +282,9 @@ static void test_parse_uid(void) { r = parse_uid("65535", &uid); assert_se(r == -ENXIO); + + r = parse_uid("asdsdas", &uid); + assert_se(r == -EINVAL); } static void test_safe_atou16(void) { From d7e46e01aca53017cffb659115665be621264b8f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 18:24:57 +0200 Subject: [PATCH 07/20] audit: audit calls should return ENODATA when process are not in an audit session ENODATA is how we usually indicate such "missing info" cases, so we should do this here, too. --- src/basic/audit.c | 9 ++++++++- src/libsystemd/sd-bus/bus-creds.c | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/basic/audit.c b/src/basic/audit.c index 54148fcf18..1f593aa813 100644 --- a/src/basic/audit.c +++ b/src/basic/audit.c @@ -36,6 +36,11 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) { assert(id); + /* We don't convert ENOENT to ESRCH here, since we can't + * really distuingish between "audit is not available in the + * kernel" and "the process does not exist", both which will + * result in ENOENT. */ + p = procfs_file_alloca(pid, "sessionid"); r = read_one_line_file(p, &s); @@ -47,7 +52,7 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) { return r; if (u == AUDIT_SESSION_INVALID || u <= 0) - return -ENXIO; + return -ENODATA; *id = u; return 0; @@ -68,6 +73,8 @@ int audit_loginuid_from_pid(pid_t pid, uid_t *uid) { return r; r = parse_uid(s, &u); + if (r == -ENXIO) /* the UID was -1 */ + return -ENODATA; if (r < 0) return r; diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c index 1c365b7fcd..c3cc2b7212 100644 --- a/src/libsystemd/sd-bus/bus-creds.c +++ b/src/libsystemd/sd-bus/bus-creds.c @@ -1062,8 +1062,8 @@ int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) { if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) { r = audit_session_from_pid(pid, &c->audit_session_id); - if (r == -ENXIO) { - /* ENXIO means: no audit session id assigned */ + if (r == -ENODATA) { + /* ENODATA means: no audit session id assigned */ c->audit_session_id = AUDIT_SESSION_INVALID; c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID; } else if (r < 0) { @@ -1075,8 +1075,8 @@ int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) { if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) { r = audit_loginuid_from_pid(pid, &c->audit_login_uid); - if (r == -ENXIO) { - /* ENXIO means: no audit login uid assigned */ + if (r == -ENODATA) { + /* ENODATA means: no audit login uid assigned */ c->audit_login_uid = UID_INVALID; c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID; } else if (r < 0) { From b8725df8b338b13405e430d037f0aac7872ce3d5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 18:27:19 +0200 Subject: [PATCH 08/20] cgroup: when comparing agent paths, use path_equal() After all a path is a path is a path and we should use path_equal() to comapre those. --- src/basic/cgroup-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 8299f6ffd5..3ef96b238e 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -899,7 +899,7 @@ int cg_install_release_agent(const char *controller, const char *agent) { r = write_string_file(fs, agent, 0); if (r < 0) return r; - } else if (!streq(sc, agent)) + } else if (!path_equal(sc, agent)) return -EEXIST; fs = mfree(fs); From 9a66c87a2386468fd3adc250cd9003644a7a5e6b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 18:28:21 +0200 Subject: [PATCH 09/20] cgroup: make sure cg_is_empty_recursive() returns 1 for non-existing cgroups Previously, on the legacy hierarchy a non-existing cgroup was considered identical to an empty one, but the unified hierarchy the check for a non-existing one returned ENOENT. --- src/basic/cgroup-util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 3ef96b238e..bf897c9b2d 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -1002,6 +1002,8 @@ int cg_is_empty_recursive(const char *controller, const char *path) { return r; r = read_one_line_file(populated, &t); + if (r == -ENOENT) + return 1; if (r < 0) return r; From 989189eabf420690ca64d4713cea62e79c945d86 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 19:43:15 +0200 Subject: [PATCH 10/20] cgroup: fix potential bad memory access --- src/basic/cgroup-util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index bf897c9b2d..77375f3669 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -518,9 +518,9 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (!path && !suffix) return -EINVAL; - if (isempty(suffix)) + if (!suffix) t = strdup(path); - else if (isempty(path)) + else if (!path) t = strdup(suffix); else t = strjoin(path, "/", suffix, NULL); From 1c80e425124457146ab03279e44ba5155d3e1716 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 19:44:02 +0200 Subject: [PATCH 11/20] cgroup-util: make cg_pid_get_path() return -ENODATA when controller can't be found If the controller managed by systemd cannot found in /proc/$PID/cgroup, return ENODATA, the usual error for cases where the data being looked for does not exist, even if the process does. --- src/basic/cgroup-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 77375f3669..812308e243 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -870,7 +870,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { return 0; } - return -ENOENT; + return -ENODATA; } int cg_install_release_agent(const char *controller, const char *agent) { From ba09d9c687986305e5b60f923e85e2ec1e78d979 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 19:46:23 +0200 Subject: [PATCH 12/20] cgroup: fix potential access of uninitialized variable --- src/basic/cgroup-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 812308e243..fe8750d0f3 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -1910,7 +1910,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; - int r, unified; + int r = 0, unified; if (!path_equal(from, to)) { r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true); From 5f4c5fef66581383ee852b301db67f687663004c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 19:50:37 +0200 Subject: [PATCH 13/20] cgroup: always read the supported controllers from the root cgroup of the local container Otherwise we might end up thinking that we support more controllers than actually enabled for the container we are running in. --- src/basic/cgroup-util.c | 12 ++++++++++-- src/test/test-cgroup-util.c | 12 ++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index fe8750d0f3..388bd629ee 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -1981,14 +1981,22 @@ int cg_mask_supported(CGroupMask *ret) { if (unified < 0) return unified; if (unified > 0) { - _cleanup_free_ char *controllers = NULL; + _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL; const char *c; /* In the unified hierarchy we can read the supported * and accessible controllers from a the top-level * cgroup attribute */ - r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers); + r = cg_get_root_path(&root); + if (r < 0) + return r; + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path); + if (r < 0) + return r; + + r = read_one_line_file(path, &controllers); if (r < 0) return r; diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index ecc9d70bf4..ff7e45901c 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -295,6 +295,17 @@ static void test_shift_path(void) { test_shift_path_one("/foobar/waldo", "/fuckfuck", "/foobar/waldo"); } +static void test_mask_supported(void) { + + CGroupMask m; + CGroupController c; + + assert_se(cg_mask_supported(&m) >= 0); + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) + printf("'%s' is supported: %s\n", cgroup_controller_to_string(c), yes_no(m & CGROUP_CONTROLLER_TO_MASK(c))); +} + int main(void) { test_path_decode_unit(); test_path_get_unit(); @@ -309,6 +320,7 @@ int main(void) { test_controller_is_valid(); test_slice_to_path(); test_shift_path(); + test_mask_supported(); return 0; } From 98e4d8d7635496cbf62c8127ce6a8e8f7604a031 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 20:10:00 +0200 Subject: [PATCH 14/20] nspawn: enable all controllers we can for the "payload" subcgroup we create In the unified hierarchy delegating controller access is safe, hence make sure to enable all controllers for the "payload" subcgroup if we create it, so that the container will have all controllers enabled the nspawn service itself has. --- src/basic/cgroup-util.c | 2 +- src/nspawn/nspawn.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 388bd629ee..a298b29382 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2163,7 +2163,7 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { r = write_string_file(fs, s, 0); if (r < 0) - log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs); + log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs); } } diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index a56960506c..1c64c3e771 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -4737,6 +4737,7 @@ static int create_subcgroup(pid_t pid) { _cleanup_free_ char *cgroup = NULL; const char *child; int unified, r; + CGroupMask supported; /* In the unified hierarchy inner nodes may only only contain * subgroups, but not processes. Hence, if we running in the @@ -4756,6 +4757,10 @@ static int create_subcgroup(pid_t pid) { if (unified == 0) return 0; + r = cg_mask_supported(&supported); + if (r < 0) + return log_error_errno(r, "Failed to determine supported controllers: %m"); + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); if (r < 0) return log_error_errno(r, "Failed to get our control group: %m"); @@ -4770,6 +4775,8 @@ static int create_subcgroup(pid_t pid) { if (r < 0) return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + /* Try to enable as many controllers as possible for the new payload. */ + (void) cg_enable_everywhere(supported, supported, cgroup); return 0; } From 348637b28a6253591a5899665d3cde6d8d71d118 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 20:11:58 +0200 Subject: [PATCH 15/20] test: add one more test case for parse_pid() --- src/test/test-util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/test-util.c b/src/test/test-util.c index 10d8d210d5..8ceb71f22a 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -270,6 +270,9 @@ static void test_parse_pid(void) { r = parse_pid("0xFFFFFFFFFFFFFFFFF", &pid); assert_se(r == -ERANGE); assert_se(pid == 65); + + r = parse_pid("junk", &pid); + assert_se(r == -EINVAL); } static void test_parse_uid(void) { From 9da4cb2be260ed123f2676cb85cb350c527b1492 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 3 Sep 2015 20:13:09 +0200 Subject: [PATCH 16/20] sd-event: make sure RT signals are not dropped RT signals operate in a queue, and we should be careful to never merge two queued signals into one. Hence, makes sure we only ever dequeue a single signal at a time and leave the remaining ones queued in the signalfd. In order to implement correct priorities for the signals introduce one signalfd per priority, so that we only process the highest priority signal at a time. --- src/libsystemd/sd-event/sd-event.c | 434 ++++++++++++++++++--------- src/libsystemd/sd-event/test-event.c | 66 +++- 2 files changed, 359 insertions(+), 141 deletions(-) diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index c419be820a..838ee4d454 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -56,9 +56,22 @@ typedef enum EventSourceType { _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1 } EventSourceType; +/* All objects we use in epoll events start with this value, so that + * we know how to dispatch it */ +typedef enum WakeupType { + WAKEUP_NONE, + WAKEUP_EVENT_SOURCE, + WAKEUP_CLOCK_DATA, + WAKEUP_SIGNAL_DATA, + _WAKEUP_TYPE_MAX, + _WAKEUP_TYPE_INVALID = -1, +} WakeupType; + #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM) struct sd_event_source { + WakeupType wakeup; + unsigned n_ref; sd_event *event; @@ -120,6 +133,7 @@ struct sd_event_source { }; struct clock_data { + WakeupType wakeup; int fd; /* For all clocks we maintain two priority queues each, one @@ -136,11 +150,23 @@ struct clock_data { bool needs_rearm:1; }; +struct signal_data { + WakeupType wakeup; + + /* For each priority we maintain one signal fd, so that we + * only have to dequeue a single event per priority at a + * time. */ + + int fd; + int64_t priority; + sigset_t sigset; + sd_event_source *current; +}; + struct sd_event { unsigned n_ref; int epoll_fd; - int signal_fd; int watchdog_fd; Prioq *pending; @@ -157,8 +183,8 @@ struct sd_event { usec_t perturb; - sigset_t sigset; - sd_event_source **signal_sources; + sd_event_source **signal_sources; /* indexed by signal number */ + Hashmap *signal_data; /* indexed by priority */ Hashmap *child_sources; unsigned n_enabled_child_sources; @@ -355,6 +381,7 @@ static int exit_prioq_compare(const void *a, const void *b) { static void free_clock_data(struct clock_data *d) { assert(d); + assert(d->wakeup == WAKEUP_CLOCK_DATA); safe_close(d->fd); prioq_free(d->earliest); @@ -378,7 +405,6 @@ static void event_free(sd_event *e) { *(e->default_event_ptr) = NULL; safe_close(e->epoll_fd); - safe_close(e->signal_fd); safe_close(e->watchdog_fd); free_clock_data(&e->realtime); @@ -392,6 +418,7 @@ static void event_free(sd_event *e) { prioq_free(e->exit); free(e->signal_sources); + hashmap_free(e->signal_data); hashmap_free(e->child_sources); set_free(e->post_sources); @@ -409,13 +436,12 @@ _public_ int sd_event_new(sd_event** ret) { return -ENOMEM; e->n_ref = 1; - e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; + e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY; + e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA; e->original_pid = getpid(); e->perturb = USEC_INFINITY; - assert_se(sigemptyset(&e->sigset) == 0); - e->pending = prioq_new(pending_prioq_compare); if (!e->pending) { r = -ENOMEM; @@ -509,7 +535,6 @@ static int source_io_register( r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev); else r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev); - if (r < 0) return -errno; @@ -591,45 +616,171 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) { } } -static bool need_signal(sd_event *e, int signal) { - return (e->signal_sources && e->signal_sources[signal] && - e->signal_sources[signal]->enabled != SD_EVENT_OFF) - || - (signal == SIGCHLD && - e->n_enabled_child_sources > 0); -} +static int event_make_signal_data( + sd_event *e, + int sig, + struct signal_data **ret) { -static int event_update_signal_fd(sd_event *e) { struct epoll_event ev = {}; - bool add_to_epoll; + struct signal_data *d; + bool added = false; + sigset_t ss_copy; + int64_t priority; int r; assert(e); if (event_pid_changed(e)) - return 0; + return -ECHILD; - add_to_epoll = e->signal_fd < 0; + if (e->signal_sources && e->signal_sources[sig]) + priority = e->signal_sources[sig]->priority; + else + priority = 0; - r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC); - if (r < 0) - return -errno; + d = hashmap_get(e->signal_data, &priority); + if (d) { + if (sigismember(&d->sigset, sig) > 0) { + if (ret) + *ret = d; + return 0; + } + } else { + r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops); + if (r < 0) + return r; - e->signal_fd = r; + d = new0(struct signal_data, 1); + if (!d) + return -ENOMEM; - if (!add_to_epoll) - return 0; + d->wakeup = WAKEUP_SIGNAL_DATA; + d->fd = -1; + d->priority = priority; - ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL); + r = hashmap_put(e->signal_data, &d->priority, d); + if (r < 0) + return r; - r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev); - if (r < 0) { - e->signal_fd = safe_close(e->signal_fd); - return -errno; + added = true; } + ss_copy = d->sigset; + assert_se(sigaddset(&ss_copy, sig) >= 0); + + r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC); + if (r < 0) { + r = -errno; + goto fail; + } + + d->sigset = ss_copy; + + if (d->fd >= 0) { + if (ret) + *ret = d; + return 0; + } + + d->fd = r; + + ev.events = EPOLLIN; + ev.data.ptr = d; + + r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev); + if (r < 0) { + r = -errno; + goto fail; + } + + if (ret) + *ret = d; + return 0; + +fail: + if (added) { + d->fd = safe_close(d->fd); + hashmap_remove(e->signal_data, &d->priority); + free(d); + } + + return r; +} + +static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) { + assert(e); + assert(d); + + /* Turns off the specified signal in the signal data + * object. If the signal mask of the object becomes empty that + * way removes it. */ + + if (sigismember(&d->sigset, sig) == 0) + return; + + assert_se(sigdelset(&d->sigset, sig) >= 0); + + if (sigisemptyset(&d->sigset)) { + + /* If all the mask is all-zero we can get rid of the structure */ + hashmap_remove(e->signal_data, &d->priority); + assert(!d->current); + safe_close(d->fd); + free(d); + return; + } + + assert(d->fd >= 0); + + if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0) + log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m"); +} + +static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) { + struct signal_data *d; + static const int64_t zero_priority = 0; + + assert(e); + + /* Rechecks if the specified signal is still something we are + * interested in. If not, we'll unmask it, and possibly drop + * the signalfd for it. */ + + if (sig == SIGCHLD && + e->n_enabled_child_sources > 0) + return; + + if (e->signal_sources && + e->signal_sources[sig] && + e->signal_sources[sig]->enabled != SD_EVENT_OFF) + return; + + /* + * The specified signal might be enabled in three different queues: + * + * 1) the one that belongs to the priority passed (if it is non-NULL) + * 2) the one that belongs to the priority of the event source of the signal (if there is one) + * 3) the 0 priority (to cover the SIGCHLD case) + * + * Hence, let's remove it from all three here. + */ + + if (priority) { + d = hashmap_get(e->signal_data, priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + if (e->signal_sources && e->signal_sources[sig]) { + d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + d = hashmap_get(e->signal_data, &zero_priority); + if (d) + event_unmask_signal_data(e, d, sig); } static void source_disconnect(sd_event_source *s) { @@ -668,17 +819,11 @@ static void source_disconnect(sd_event_source *s) { case SOURCE_SIGNAL: if (s->signal.sig > 0) { + if (s->event->signal_sources) s->event->signal_sources[s->signal.sig] = NULL; - /* If the signal was on and now it is off... */ - if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) { - assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } + event_gc_signal_data(s->event, &s->priority, s->signal.sig); } break; @@ -688,18 +833,10 @@ static void source_disconnect(sd_event_source *s) { if (s->enabled != SD_EVENT_OFF) { assert(s->event->n_enabled_child_sources > 0); s->event->n_enabled_child_sources--; - - /* We know the signal was on, if it is off now... */ - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } } - hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid)); + (void) hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid)); + event_gc_signal_data(s->event, &s->priority, SIGCHLD); } break; @@ -778,6 +915,14 @@ static int source_set_pending(sd_event_source *s, bool b) { d->needs_rearm = true; } + if (s->type == SOURCE_SIGNAL && !b) { + struct signal_data *d; + + d = hashmap_get(s->event->signal_data, &s->priority); + if (d && d->current == s) + d->current = NULL; + } + return 0; } @@ -827,6 +972,7 @@ _public_ int sd_event_add_io( if (!s) return -ENOMEM; + s->wakeup = WAKEUP_EVENT_SOURCE; s->io.fd = fd; s->io.events = events; s->io.callback = callback; @@ -883,7 +1029,7 @@ static int event_setup_timer_fd( return -errno; ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock)); + ev.data.ptr = d; r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev); if (r < 0) { @@ -993,9 +1139,9 @@ _public_ int sd_event_add_signal( void *userdata) { sd_event_source *s; + struct signal_data *d; sigset_t ss; int r; - bool previous; assert_return(e, -EINVAL); assert_return(sig > 0, -EINVAL); @@ -1020,8 +1166,6 @@ _public_ int sd_event_add_signal( } else if (e->signal_sources[sig]) return -EBUSY; - previous = need_signal(e, sig); - s = source_new(e, !ret, SOURCE_SIGNAL); if (!s) return -ENOMEM; @@ -1033,14 +1177,10 @@ _public_ int sd_event_add_signal( e->signal_sources[sig] = s; - if (!previous) { - assert_se(sigaddset(&e->sigset, sig) == 0); - - r = event_update_signal_fd(e); - if (r < 0) { - source_free(s); - return r; - } + r = event_make_signal_data(e, sig, &d); + if (r < 0) { + source_free(s); + return r; } /* Use the signal name as description for the event source by default */ @@ -1062,7 +1202,6 @@ _public_ int sd_event_add_child( sd_event_source *s; int r; - bool previous; assert_return(e, -EINVAL); assert_return(pid > 1, -EINVAL); @@ -1079,8 +1218,6 @@ _public_ int sd_event_add_child( if (hashmap_contains(e->child_sources, INT_TO_PTR(pid))) return -EBUSY; - previous = need_signal(e, SIGCHLD); - s = source_new(e, !ret, SOURCE_CHILD); if (!s) return -ENOMEM; @@ -1099,14 +1236,11 @@ _public_ int sd_event_add_child( e->n_enabled_child_sources ++; - if (!previous) { - assert_se(sigaddset(&e->sigset, SIGCHLD) == 0); - - r = event_update_signal_fd(e); - if (r < 0) { - source_free(s); - return r; - } + r = event_make_signal_data(e, SIGCHLD, NULL); + if (r < 0) { + e->n_enabled_child_sources--; + source_free(s); + return r; } e->need_process_child = true; @@ -1406,6 +1540,8 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) } _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) { + int r; + assert_return(s, -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1413,7 +1549,25 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) if (s->priority == priority) return 0; - s->priority = priority; + if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { + struct signal_data *old, *d; + + /* Move us from the signalfd belonging to the old + * priority to the signalfd of the new priority */ + + assert_se(old = hashmap_get(s->event->signal_data, &s->priority)); + + s->priority = priority; + + r = event_make_signal_data(s->event, s->signal.sig, &d); + if (r < 0) { + s->priority = old->priority; + return r; + } + + event_unmask_signal_data(s->event, old, s->signal.sig); + } else + s->priority = priority; if (s->pending) prioq_reshuffle(s->event->pending, s, &s->pending_index); @@ -1478,34 +1632,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } case SOURCE_SIGNAL: - assert(need_signal(s->event, s->signal.sig)); - s->enabled = m; - if (!need_signal(s->event, s->signal.sig)) { - assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } - + event_gc_signal_data(s->event, &s->priority, s->signal.sig); break; case SOURCE_CHILD: - assert(need_signal(s->event, SIGCHLD)); - s->enabled = m; assert(s->event->n_enabled_child_sources > 0); s->event->n_enabled_child_sources--; - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0); - - (void) event_update_signal_fd(s->event); - } - + event_gc_signal_data(s->event, &s->priority, SIGCHLD); break; case SOURCE_EXIT: @@ -1551,37 +1689,33 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } case SOURCE_SIGNAL: - /* Check status before enabling. */ - if (!need_signal(s->event, s->signal.sig)) { - assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0); - - r = event_update_signal_fd(s->event); - if (r < 0) { - s->enabled = SD_EVENT_OFF; - return r; - } - } s->enabled = m; + + r = event_make_signal_data(s->event, s->signal.sig, NULL); + if (r < 0) { + s->enabled = SD_EVENT_OFF; + event_gc_signal_data(s->event, &s->priority, s->signal.sig); + return r; + } + break; case SOURCE_CHILD: - /* Check status before enabling. */ - if (s->enabled == SD_EVENT_OFF) { - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0); - - r = event_update_signal_fd(s->event); - if (r < 0) { - s->enabled = SD_EVENT_OFF; - return r; - } - } + if (s->enabled == SD_EVENT_OFF) s->event->n_enabled_child_sources++; - } s->enabled = m; + + r = event_make_signal_data(s->event, s->signal.sig, SIGCHLD); + if (r < 0) { + s->enabled = SD_EVENT_OFF; + s->event->n_enabled_child_sources--; + event_gc_signal_data(s->event, &s->priority, SIGCHLD); + return r; + } + break; case SOURCE_EXIT: @@ -2025,20 +2159,35 @@ static int process_child(sd_event *e) { return 0; } -static int process_signal(sd_event *e, uint32_t events) { +static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { bool read_one = false; int r; assert(e); - assert_return(events == EPOLLIN, -EIO); + /* If there's a signal queued on this priority and SIGCHLD is + on this priority too, then make sure to recheck the + children we watch. This is because we only ever dequeue + the first signal per priority, and if we dequeue one, and + SIGCHLD might be enqueued later we wouldn't know, but we + might have higher priority children we care about hence we + need to check that explicitly. */ + + if (sigismember(&d->sigset, SIGCHLD)) + e->need_process_child = true; + + /* If there's already an event source pending for this + * priority we don't read another */ + if (d->current) + return 0; + for (;;) { struct signalfd_siginfo si; ssize_t n; sd_event_source *s = NULL; - n = read(e->signal_fd, &si, sizeof(si)); + n = read(d->fd, &si, sizeof(si)); if (n < 0) { if (errno == EAGAIN || errno == EINTR) return read_one; @@ -2053,24 +2202,21 @@ static int process_signal(sd_event *e, uint32_t events) { read_one = true; - if (si.ssi_signo == SIGCHLD) { - r = process_child(e); - if (r < 0) - return r; - if (r > 0) - continue; - } - if (e->signal_sources) s = e->signal_sources[si.ssi_signo]; - if (!s) continue; + if (s->pending) + continue; s->signal.siginfo = si; + d->current = s; + r = source_set_pending(s, true); if (r < 0) return r; + + return 1; } } @@ -2388,23 +2534,31 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { for (i = 0; i < m; i++) { - if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME)) - r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME)) - r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC)) - r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM)) - r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM)) - r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL)) - r = process_signal(e, ev_queue[i].events); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) + if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL); - else - r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); + else { + WakeupType *t = ev_queue[i].data.ptr; + switch (*t) { + + case WAKEUP_EVENT_SOURCE: + r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + + case WAKEUP_CLOCK_DATA: { + struct clock_data *d = ev_queue[i].data.ptr; + r = flush_timer(e, d->fd, ev_queue[i].events, &d->next); + break; + } + + case WAKEUP_SIGNAL_DATA: + r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + + default: + assert_not_reached("Invalid wake-up pointer"); + } + } if (r < 0) goto finish; } diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c index 408e1679a2..c092e56b7a 100644 --- a/src/libsystemd/sd-event/test-event.c +++ b/src/libsystemd/sd-event/test-event.c @@ -156,7 +156,7 @@ static int exit_handler(sd_event_source *s, void *userdata) { return 3; } -int main(int argc, char *argv[]) { +static void test_basic(void) { sd_event *e = NULL; sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL; static const char ch = 'x'; @@ -244,6 +244,70 @@ int main(int argc, char *argv[]) { safe_close_pair(b); safe_close_pair(d); safe_close_pair(k); +} + +static int last_rtqueue_sigval = 0; +static int n_rtqueue = 0; + +static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { + last_rtqueue_sigval = si->ssi_int; + n_rtqueue ++; + return 0; +} + +static void test_rtqueue(void) { + sd_event_source *u = NULL, *v = NULL, *s = NULL; + sd_event *e = NULL; + + assert_se(sd_event_default(&e) >= 0); + + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0); + assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0); + assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0); + assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0); + + assert_se(sd_event_source_set_priority(v, -10) >= 0); + + assert(sigqueue(getpid(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0); + assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0); + assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0); + assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0); + assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0); + + assert_se(n_rtqueue == 0); + assert_se(last_rtqueue_sigval == 0); + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 1); + assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */ + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 2); + assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */ + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 3); + assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */ + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 4); + assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */ + + assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */ + assert_se(n_rtqueue == 4); + assert_se(last_rtqueue_sigval == 1); + + sd_event_source_unref(u); + sd_event_source_unref(v); + sd_event_source_unref(s); + + sd_event_unref(e); +} + +int main(int argc, char *argv[]) { + + test_basic(); + test_rtqueue(); return 0; } From 707b66c66381c899d7ef640e158ffdd5bcff4deb Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 4 Sep 2015 09:05:52 +0200 Subject: [PATCH 17/20] sd-login: rework error handling Makre sure we always return sensible errors for the various, following the same rules, and document them in a comment in sd-login.c. Also, update all relevant man pages accordingly. --- man/sd_get_seats.xml | 23 ++++ man/sd_login_monitor_new.xml | 37 ++++-- man/sd_machine_get_class.xml | 31 ++++- man/sd_pid_get_session.xml | 38 ++++-- man/sd_seat_get_active.xml | 37 ++++++ man/sd_session_is_active.xml | 37 ++++++ man/sd_uid_get_state.xml | 39 ++++++ src/libsystemd/sd-login/sd-login.c | 178 +++++++++++++++------------ src/libsystemd/sd-login/test-login.c | 8 +- 9 files changed, 325 insertions(+), 103 deletions(-) diff --git a/man/sd_get_seats.xml b/man/sd_get_seats.xml index 4390d36ebe..f1981f7ea2 100644 --- a/man/sd_get_seats.xml +++ b/man/sd_get_seats.xml @@ -115,6 +115,29 @@ errno-style error code. + + + Errors + + Returned errors may indicate the following problems: + + + + + -EINVAL + + An input parameter was invalid (out of range, + or NULL, where that's not accepted). + + + + -ENOMEM + + Memory allocation failed. + + + + Notes diff --git a/man/sd_login_monitor_new.xml b/man/sd_login_monitor_new.xml index a7b47a3207..a8854dd590 100644 --- a/man/sd_login_monitor_new.xml +++ b/man/sd_login_monitor_new.xml @@ -161,20 +161,20 @@ is no timeout to wait for this will fill in (uint64_t) -1 instead. Note that poll() takes a relative timeout in milliseconds rather than an absolute timeout - in microseconds. To convert the absolute 'us' timeout into + in microseconds. To convert the absolute 'µs' timeout into relative 'ms', use code like the following: uint64_t t; int msec; sd_login_monitor_get_timeout(m, &t); if (t == (uint64_t) -1) - msec = -1; + msec = -1; else { - struct timespec ts; - uint64_t n; - clock_getttime(CLOCK_MONOTONIC, &ts); - n = (uint64_t) ts.tv_sec * 1000000 + ts.tv_nsec / 1000; - msec = t > n ? (int) ((t - n + 999) / 1000) : 0; + struct timespec ts; + uint64_t n; + clock_getttime(CLOCK_MONOTONIC, &ts); + n = (uint64_t) ts.tv_sec * 1000000 + ts.tv_nsec / 1000; + msec = t > n ? (int) ((t - n + 999) / 1000) : 0; } The code above does not do any error checking for brevity's @@ -203,6 +203,29 @@ else { always returns NULL. + + Errors + + Returned errors may indicate the following problems: + + + + + -EINVAL + + An input parameter was invalid (out of range, + or NULL, where that's not accepted). The specified category to + watch is not known. + + + + -ENOMEM + + Memory allocation failed. + + + + Notes diff --git a/man/sd_machine_get_class.xml b/man/sd_machine_get_class.xml index 5b881ccea1..9ad7f3fc66 100644 --- a/man/sd_machine_get_class.xml +++ b/man/sd_machine_get_class.xml @@ -56,7 +56,7 @@ int sd_machine_get_class const char* machine - char *class + char **class @@ -98,6 +98,35 @@ code. + + Errors + + Returned errors may indicate the following problems: + + + + + -ENXIO + + The specified machine does not exist or is currently not running. + + + + + -EINVAL + + An input parameter was invalid (out of range, + or NULL, where that's not accepted). + + + + -ENOMEM + + Memory allocation failed. + + + + Notes diff --git a/man/sd_pid_get_session.xml b/man/sd_pid_get_session.xml index 9c6706caf8..903c143f36 100644 --- a/man/sd_pid_get_session.xml +++ b/man/sd_pid_get_session.xml @@ -1,4 +1,4 @@ - + @@ -163,7 +163,7 @@ processes, user processes that are shared between multiple sessions of the same user, or kernel threads). For processes not being part of a login session this function will fail with - -ENXIO. The returned string needs to be freed with the libc + -ENODATA. The returned string needs to be freed with the libc free3 call after use. @@ -175,9 +175,9 @@ paths. Note that not all processes are part of a system unit/service (e.g. user processes, or kernel threads). For processes not being part of a systemd system unit this function - will fail with -ENXIO (More specifically: this call will not work - for kernel threads.) The returned string needs to be freed with - the libc free3 call after use. @@ -194,7 +194,7 @@ multiple login sessions of the same user, where sd_pid_get_session() will fail. For processes not being part of a login session and not being a shared process - of a user this function will fail with -ENXIO. + of a user this function will fail with -ENODATA. sd_pid_get_machine_name() may be used to determine the name of the VM or container is a member of. The @@ -203,7 +203,7 @@ free3 call after use. For processes not part of a VM or containers this - function fails with -ENXIO. + function fails with -ENODATA. sd_pid_get_slice() may be used to determine the slice unit the process is a member of. See @@ -251,7 +251,22 @@ - -ENXIO + -ESRCH + + The specified PID does not refer to a running + process. + + + + + -BADF + + The specified socket file descriptor was + invalid. + + + + -ENODATA Given field is not specified for the described process or peer. @@ -259,11 +274,10 @@ - -ESRCH + -EINVAL - The specified PID does not refer to a running - process. - + An input parameter was invalid (out of range, + or NULL, where that's not accepted). diff --git a/man/sd_seat_get_active.xml b/man/sd_seat_get_active.xml index 3c57ec9ea4..4d3e0822e0 100644 --- a/man/sd_seat_get_active.xml +++ b/man/sd_seat_get_active.xml @@ -148,6 +148,43 @@ errno-style error code. + + Errors + + Returned errors may indicate the following problems: + + + + + -ENODATA + + Given field is not specified for the described + seat. + + + + + -ENXIO + + The specified seat is unknown. + + + + + -EINVAL + + An input parameter was invalid (out of range, + or NULL, where that's not accepted). + + + + -ENOMEM + + Memory allocation failed. + + + + Notes diff --git a/man/sd_session_is_active.xml b/man/sd_session_is_active.xml index 4ca3a6c150..7de9523789 100644 --- a/man/sd_session_is_active.xml +++ b/man/sd_session_is_active.xml @@ -289,6 +289,43 @@ negative errno-style error code. + + Errors + + Returned errors may indicate the following problems: + + + + + -ENXIO + + The specified session does not exist. + + + + + -ENODATA + + Given field is not specified for the described + session. + + + + + -EINVAL + + An input parameter was invalid (out of range, + or NULL, where that's not accepted). + + + + -ENOMEM + + Memory allocation failed. + + + + Notes diff --git a/man/sd_uid_get_state.xml b/man/sd_uid_get_state.xml index b158f3528c..13ddf08c65 100644 --- a/man/sd_uid_get_state.xml +++ b/man/sd_uid_get_state.xml @@ -169,6 +169,45 @@ errno-style error code. + + Errors + + Returned errors may indicate the following problems: + + + + + -ENODATA + + Given field is not specified for the described + user. + + + + + -ENXIO + + The specified seat is unknown. + + + + + -EINVAL + + An input parameter was invalid (out of range, + or NULL, where that's not accepted). This is also returned if + the passed user ID is 0xFFFF or 0xFFFFFFFF, which are + undefined on Linux. + + + + -ENOMEM + + Memory allocation failed. + + + + Notes diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c index 6300162ebe..180bdd09ad 100644 --- a/src/libsystemd/sd-login/sd-login.c +++ b/src/libsystemd/sd-login/sd-login.c @@ -35,6 +35,16 @@ #include "hostname-util.h" #include "sd-login.h" +/* Error codes: + * + * invalid input parameters → -EINVAL + * invalid fd → -EBADF + * process does not exist → -ESRCH + * cgroup does not exist → -ENOENT + * machine, session does not exist → -ENXIO + * requested metadata on object is missing → -ENODATA + */ + _public_ int sd_pid_get_session(pid_t pid, char **session) { assert_return(pid >= 0, -EINVAL); @@ -190,6 +200,8 @@ _public_ int sd_peer_get_user_slice(int fd, char **slice) { } static int file_of_uid(uid_t uid, char **p) { + + assert_return(uid_is_valid(uid), -EINVAL); assert(p); if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0) @@ -203,7 +215,6 @@ _public_ int sd_uid_get_state(uid_t uid, char**state) { char *s = NULL; int r; - assert_return(uid_is_valid(uid), -EINVAL); assert_return(state, -EINVAL); r = file_of_uid(uid, &p); @@ -217,11 +228,15 @@ _public_ int sd_uid_get_state(uid_t uid, char**state) { if (!s) return -ENOMEM; - } else if (r < 0) { + } + if (r < 0) { free(s); return r; - } else if (!s) + } + if (isempty(s)) { + free(s); return -EIO; + } *state = s; return 0; @@ -231,7 +246,6 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) { _cleanup_free_ char *p = NULL, *s = NULL; int r; - assert_return(uid_is_valid(uid), -EINVAL); assert_return(session, -EINVAL); r = file_of_uid(uid, &p); @@ -240,12 +254,11 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) { r = parse_env_file(p, NEWLINE, "DISPLAY", &s, NULL); if (r == -ENOENT) - return -ENXIO; + return -ENODATA; if (r < 0) return r; - if (isempty(s)) - return -ENXIO; + return -ENODATA; *session = s; s = NULL; @@ -253,6 +266,35 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) { return 0; } +static int file_of_seat(const char *seat, char **_p) { + char *p; + int r; + + assert(_p); + + if (seat) { + if (!filename_is_valid(seat)) + return -EINVAL; + + p = strappend("/run/systemd/seats/", seat); + } else { + _cleanup_free_ char *buf = NULL; + + r = sd_session_get_seat(NULL, &buf); + if (r < 0) + return r; + + p = strappend("/run/systemd/seats/", buf); + } + + if (!p) + return -ENOMEM; + + *_p = p; + p = NULL; + return 0; +} + _public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) { _cleanup_free_ char *t = NULL, *s = NULL, *p = NULL; size_t l; @@ -260,29 +302,27 @@ _public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) const char *word, *variable, *state; assert_return(uid_is_valid(uid), -EINVAL); - assert_return(seat, -EINVAL); - - variable = require_active ? "ACTIVE_UID" : "UIDS"; - - p = strappend("/run/systemd/seats/", seat); - if (!p) - return -ENOMEM; - - r = parse_env_file(p, NEWLINE, variable, &s, NULL); + r = file_of_seat(seat, &p); if (r < 0) return r; - if (!s) - return -EIO; + variable = require_active ? "ACTIVE_UID" : "UIDS"; + + r = parse_env_file(p, NEWLINE, variable, &s, NULL); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + if (isempty(s)) + return 0; if (asprintf(&t, UID_FMT, uid) < 0) return -ENOMEM; - FOREACH_WORD(word, l, s, state) { + FOREACH_WORD(word, l, s, state) if (strneq(t, word, l)) return 1; - } return 0; } @@ -292,33 +332,22 @@ static int uid_get_array(uid_t uid, const char *variable, char ***array) { char **a; int r; - assert_return(uid_is_valid(uid), -EINVAL); + assert(variable); r = file_of_uid(uid, &p); if (r < 0) return r; - r = parse_env_file(p, NEWLINE, - variable, &s, - NULL); - if (r < 0) { - if (r == -ENOENT) { - if (array) - *array = NULL; - return 0; - } - - return r; - } - - if (!s) { + r = parse_env_file(p, NEWLINE, variable, &s, NULL); + if (r == -ENOENT || (r >= 0 && isempty(s))) { if (array) *array = NULL; return 0; } + if (r < 0) + return r; a = strv_split(s, " "); - if (!a) return -ENOMEM; @@ -380,37 +409,39 @@ static int file_of_session(const char *session, char **_p) { } _public_ int sd_session_is_active(const char *session) { - int r; _cleanup_free_ char *p = NULL, *s = NULL; + int r; r = file_of_session(session, &p); if (r < 0) return r; r = parse_env_file(p, NEWLINE, "ACTIVE", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - - if (!s) + if (isempty(s)) return -EIO; return parse_boolean(s); } _public_ int sd_session_is_remote(const char *session) { - int r; _cleanup_free_ char *p = NULL, *s = NULL; + int r; r = file_of_session(session, &p); if (r < 0) return r; r = parse_env_file(p, NEWLINE, "REMOTE", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - - if (!s) - return -EIO; + if (isempty(s)) + return -ENODATA; return parse_boolean(s); } @@ -426,9 +457,11 @@ _public_ int sd_session_get_state(const char *session, char **state) { return r; r = parse_env_file(p, NEWLINE, "STATE", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - else if (!s) + if (isempty(s)) return -EIO; *state = s; @@ -448,10 +481,11 @@ _public_ int sd_session_get_uid(const char *session, uid_t *uid) { return r; r = parse_env_file(p, NEWLINE, "UID", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - - if (!s) + if (isempty(s)) return -EIO; return parse_uid(s, uid); @@ -462,17 +496,19 @@ static int session_get_string(const char *session, const char *field, char **val int r; assert_return(value, -EINVAL); + assert(field); r = file_of_session(session, &p); if (r < 0) return r; r = parse_env_file(p, NEWLINE, field, &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - if (isempty(s)) - return -ENXIO; + return -ENODATA; *value = s; s = NULL; @@ -492,6 +528,8 @@ _public_ int sd_session_get_vt(const char *session, unsigned *vtnr) { unsigned u; int r; + assert_return(vtnr, -EINVAL); + r = session_get_string(session, "VTNR", &vtnr_string); if (r < 0) return r; @@ -547,32 +585,6 @@ _public_ int sd_session_get_remote_host(const char *session, char **remote_host) return session_get_string(session, "REMOTE_HOST", remote_host); } -static int file_of_seat(const char *seat, char **_p) { - char *p; - int r; - - assert(_p); - - if (seat) - p = strappend("/run/systemd/seats/", seat); - else { - _cleanup_free_ char *buf = NULL; - - r = sd_session_get_seat(NULL, &buf); - if (r < 0) - return r; - - p = strappend("/run/systemd/seats/", buf); - } - - if (!p) - return -ENOMEM; - - *_p = p; - p = NULL; - return 0; -} - _public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) { _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL; int r; @@ -587,6 +599,8 @@ _public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) { "ACTIVE", &s, "ACTIVE_UID", &t, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; @@ -625,7 +639,8 @@ _public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **ui "SESSIONS", &s, "ACTIVE_SESSIONS", &t, NULL); - + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; @@ -657,7 +672,6 @@ _public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **ui return -ENOMEM; r = parse_uid(k, b + i); - if (r < 0) continue; @@ -688,7 +702,7 @@ static int seat_get_can(const char *seat, const char *variable) { _cleanup_free_ char *p = NULL, *s = NULL; int r; - assert_return(variable, -EINVAL); + assert(variable); r = file_of_seat(seat, &p); if (r < 0) @@ -697,10 +711,12 @@ static int seat_get_can(const char *seat, const char *variable) { r = parse_env_file(p, NEWLINE, variable, &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - if (!s) - return 0; + if (isempty(s)) + return -ENODATA; return parse_boolean(s); } @@ -824,6 +840,8 @@ _public_ int sd_machine_get_class(const char *machine, char **class) { p = strjoina("/run/systemd/machines/", machine); r = parse_env_file(p, NEWLINE, "CLASS", &c, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; if (!c) @@ -847,6 +865,8 @@ _public_ int sd_machine_get_ifindices(const char *machine, int **ifindices) { p = strjoina("/run/systemd/machines/", machine); r = parse_env_file(p, NEWLINE, "NETIF", &netif, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; if (!netif) { diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c index ddea7ffa14..70b0345848 100644 --- a/src/libsystemd/sd-login/test-login.c +++ b/src/libsystemd/sd-login/test-login.c @@ -52,7 +52,7 @@ static void test_login(void) { display_session = NULL; r = sd_uid_get_display(u2, &display_session); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("user's display session = %s\n", strna(display_session)); free(display_session); @@ -108,19 +108,19 @@ static void test_login(void) { display = NULL; r = sd_session_get_display(session, &display); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("display = %s\n", strna(display)); free(display); remote_user = NULL; r = sd_session_get_remote_user(session, &remote_user); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("remote_user = %s\n", strna(remote_user)); free(remote_user); remote_host = NULL; r = sd_session_get_remote_host(session, &remote_host); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("remote_host = %s\n", strna(remote_host)); free(remote_host); From 19af675e99d32172b58f1c91c8281ba2efd5d863 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 4 Sep 2015 09:23:07 +0200 Subject: [PATCH 18/20] cgroups: delegation to unprivileged services is safe in the unified hierarchy Delegation to unpriviliged processes is safe in the unified hierarchy, hence allow it. This has the benefit of permitting "systemd --user" instances to further partition their resources between user services. --- src/core/cgroup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 0a5a08aeba..9a025cf929 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -507,15 +507,20 @@ CGroupMask unit_get_own_mask(Unit *u) { return 0; /* If delegation is turned on, then turn on all cgroups, - * unless the process we fork into it is known to drop - * privileges anyway, and shouldn't get access to the - * controllers anyway. */ + * unless we are on the legacy hierarchy and the process we + * fork into it is known to drop privileges, and hence + * shouldn't get access to the controllers. + * + * Note that on the unified hierarchy it is safe to delegate + * controllers to unprivileged services. */ if (c->delegate) { ExecContext *e; e = unit_get_exec_context(u); - if (!e || exec_context_maintains_privileges(e)) + if (!e || + exec_context_maintains_privileges(e) || + cg_unified() > 0) return _CGROUP_MASK_ALL; } From f5aaf575626022313b07d505699fa1af25d248ac Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 4 Sep 2015 09:54:14 +0200 Subject: [PATCH 19/20] sd-login: add new sd_pid_get_cgroup() API This adds a new sd_pid_get_cgroup() call to sd-login which may be used to query the control path of a process. This is useful for programs when making use of delegation units, in order to figure out which subtree has been delegated. In light of the unified control group hierarchy this is finally safe to do, hence let's add a proper API for it, to make it easier to use this. --- Makefile-man.am | 10 +++++++ man/sd_pid_get_session.xml | 40 +++++++++++++++++++++++----- src/libsystemd/libsystemd.sym | 6 +++++ src/libsystemd/sd-login/sd-login.c | 40 ++++++++++++++++++++++++++++ src/libsystemd/sd-login/test-login.c | 6 ++++- src/systemd/sd-login.h | 8 ++++++ 6 files changed, 102 insertions(+), 8 deletions(-) diff --git a/Makefile-man.am b/Makefile-man.am index 65287371b9..ab68c81b1d 100644 --- a/Makefile-man.am +++ b/Makefile-man.am @@ -1937,6 +1937,7 @@ MANPAGES_ALIAS += \ man/sd_login_monitor_get_fd.3 \ man/sd_login_monitor_get_timeout.3 \ man/sd_login_monitor_unref.3 \ + man/sd_peer_get_cgroup.3 \ man/sd_peer_get_machine_name.3 \ man/sd_peer_get_owner_uid.3 \ man/sd_peer_get_session.3 \ @@ -1944,6 +1945,7 @@ MANPAGES_ALIAS += \ man/sd_peer_get_unit.3 \ man/sd_peer_get_user_slice.3 \ man/sd_peer_get_user_unit.3 \ + man/sd_pid_get_cgroup.3 \ man/sd_pid_get_machine_name.3 \ man/sd_pid_get_owner_uid.3 \ man/sd_pid_get_slice.3 \ @@ -1981,6 +1983,7 @@ man/sd_login_monitor_get_events.3: man/sd_login_monitor_new.3 man/sd_login_monitor_get_fd.3: man/sd_login_monitor_new.3 man/sd_login_monitor_get_timeout.3: man/sd_login_monitor_new.3 man/sd_login_monitor_unref.3: man/sd_login_monitor_new.3 +man/sd_peer_get_cgroup.3: man/sd_pid_get_session.3 man/sd_peer_get_machine_name.3: man/sd_pid_get_session.3 man/sd_peer_get_owner_uid.3: man/sd_pid_get_session.3 man/sd_peer_get_session.3: man/sd_pid_get_session.3 @@ -1988,6 +1991,7 @@ man/sd_peer_get_slice.3: man/sd_pid_get_session.3 man/sd_peer_get_unit.3: man/sd_pid_get_session.3 man/sd_peer_get_user_slice.3: man/sd_pid_get_session.3 man/sd_peer_get_user_unit.3: man/sd_pid_get_session.3 +man/sd_pid_get_cgroup.3: man/sd_pid_get_session.3 man/sd_pid_get_machine_name.3: man/sd_pid_get_session.3 man/sd_pid_get_owner_uid.3: man/sd_pid_get_session.3 man/sd_pid_get_slice.3: man/sd_pid_get_session.3 @@ -2043,6 +2047,9 @@ man/sd_login_monitor_get_timeout.html: man/sd_login_monitor_new.html man/sd_login_monitor_unref.html: man/sd_login_monitor_new.html $(html-alias) +man/sd_peer_get_cgroup.html: man/sd_pid_get_session.html + $(html-alias) + man/sd_peer_get_machine_name.html: man/sd_pid_get_session.html $(html-alias) @@ -2064,6 +2071,9 @@ man/sd_peer_get_user_slice.html: man/sd_pid_get_session.html man/sd_peer_get_user_unit.html: man/sd_pid_get_session.html $(html-alias) +man/sd_pid_get_cgroup.html: man/sd_pid_get_session.html + $(html-alias) + man/sd_pid_get_machine_name.html: man/sd_pid_get_session.html $(html-alias) diff --git a/man/sd_pid_get_session.xml b/man/sd_pid_get_session.xml index 903c143f36..035effcaa9 100644 --- a/man/sd_pid_get_session.xml +++ b/man/sd_pid_get_session.xml @@ -50,6 +50,7 @@ sd_pid_get_machine_name sd_pid_get_slice sd_pid_get_user_slice + sd_pid_get_cgroup sd_peer_get_session sd_peer_get_unit sd_peer_get_user_unit @@ -57,6 +58,7 @@ sd_peer_get_machine_name sd_peer_get_slice sd_peer_get_user_slice + sd_peer_get_cgroup Determine session, unit, owner of a session, container/VM or slice of a specific PID or socket peer @@ -108,6 +110,12 @@ char **slice + + int sd_pid_get_cgroup + pid_t pid + char **cgroup + + int sd_peer_get_session int fd @@ -149,6 +157,12 @@ int fd char **slice + + + int sd_peer_get_cgroup + int fd + char **cgroup + @@ -217,6 +231,17 @@ returns the user slice (as managed by the user's systemd instance) of a process. + sd_pid_get_cgroup() returns the control + group path of the specified process, relative to the root of the + hierarchy. Returns the path without trailing slash, except for + processes located in the root control group, where "/" is + returned. To find the actual control group path in the file system + the returned path needs to be prefixed with + /sys/fs/cgroup/ (if the unified control group + setup is used), or + /sys/fs/cgroup/HIERARCHY/ + (if the legacy multi-hierarchy control group setup is used). + If the pid parameter of any of these functions is passed as 0, the operation is executed for the calling process. @@ -226,13 +251,14 @@ sd_peer_get_user_unit(), sd_peer_get_owner_uid(), sd_peer_get_machine_name(), - sd_peer_get_slice() and - sd_peer_get_user_slice() calls operate - similar to their PID counterparts, but operate on a connected - AF_UNIX socket and retrieve information about the connected peer - process. Note that these fields are retrieved via - /proc, and hence are not suitable for - authorization purposes, as they are subject to races. + sd_peer_get_slice(), + sd_peer_get_user_slice() and + sd_peer_get_cgroup() calls operate similar to + their PID counterparts, but operate on a connected AF_UNIX socket + and retrieve information about the connected peer process. Note + that these fields are retrieved via /proc, + and hence are not suitable for authorization purposes, as they are + subject to races. diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 7bf1d66dde..d5ad127bcb 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -467,3 +467,9 @@ global: sd_bus_emit_object_removed; sd_bus_flush_close_unref; } LIBSYSTEMD_221; + +LIBSYSTEMD_226 { +global: + sd_pid_get_cgroup; + sd_peer_get_cgroup; +} LIBSYSTEMD_222; diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c index 180bdd09ad..55da26e9d9 100644 --- a/src/libsystemd/sd-login/sd-login.c +++ b/src/libsystemd/sd-login/sd-login.c @@ -101,6 +101,32 @@ _public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) { return cg_pid_get_owner_uid(pid, uid); } +_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) { + char *c; + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(cgroup, -EINVAL); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c); + if (r < 0) + return r; + + /* The internal APIs return the empty string for the root + * cgroup, let's return the "/" in the public APIs instead, as + * that's easier and less ambigious for people to grok. */ + if (isempty(c)) { + free(c); + c = strdup("/"); + if (!c) + return -ENOMEM; + + } + + *cgroup = c; + return 0; +} + _public_ int sd_peer_get_session(int fd, char **session) { struct ucred ucred = {}; int r; @@ -199,6 +225,20 @@ _public_ int sd_peer_get_user_slice(int fd, char **slice) { return cg_pid_get_user_slice(ucred.pid, slice); } +_public_ int sd_peer_get_cgroup(int fd, char **cgroup) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(cgroup, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return sd_pid_get_cgroup(ucred.pid, cgroup); +} + static int file_of_uid(uid_t uid, char **p) { assert_return(uid_is_valid(uid), -EINVAL); diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c index 70b0345848..f734ce9eee 100644 --- a/src/libsystemd/sd-login/test-login.c +++ b/src/libsystemd/sd-login/test-login.c @@ -33,7 +33,7 @@ static void test_login(void) { _cleanup_free_ char *pp = NULL, *qq = NULL; int r, k; uid_t u, u2; - char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session; + char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session, *cgroup; char *session; char *state; char *session2; @@ -50,6 +50,10 @@ static void test_login(void) { assert_se(sd_pid_get_owner_uid(0, &u2) == 0); printf("user = "UID_FMT"\n", u2); + assert_se(sd_pid_get_cgroup(0, &cgroup) == 0); + printf("cgroup = %s\n", cgroup); + free(cgroup); + display_session = NULL; r = sd_uid_get_display(u2, &display_session); assert_se(r >= 0 || r == -ENODATA); diff --git a/src/systemd/sd-login.h b/src/systemd/sd-login.h index 9260396d5d..39b68c3895 100644 --- a/src/systemd/sd-login.h +++ b/src/systemd/sd-login.h @@ -81,6 +81,10 @@ int sd_pid_get_user_slice(pid_t pid, char **slice); * container. This will return an error for non-machine processes. */ int sd_pid_get_machine_name(pid_t pid, char **machine); +/* Get the control group from a PID, relative to the root of the + * hierarchy. */ +int sd_pid_get_cgroup(pid_t pid, char **cgroup); + /* Similar to sd_pid_get_session(), but retrieves data about peer of * connected AF_UNIX socket */ int sd_peer_get_session(int fd, char **session); @@ -109,6 +113,10 @@ int sd_peer_get_user_slice(int fd, char **slice); * of connected AF_UNIX socket */ int sd_peer_get_machine_name(int fd, char **machine); +/* Similar to sd_pid_get_cgroup(), but retrieves data about the peer + * of a connected AF_UNIX socket. */ +int sd_peer_get_cgroup(pid_t pid, char **cgroup); + /* Get state from UID. Possible states: offline, lingering, online, active, closing */ int sd_uid_get_state(uid_t uid, char **state); From a67c56bff419afc16547f16df1ad1bbe0efe84a6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 4 Sep 2015 09:57:51 +0200 Subject: [PATCH 20/20] sd-login: minor header commenting improvements --- src/systemd/sd-login.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/systemd/sd-login.h b/src/systemd/sd-login.h index 39b68c3895..59c6eedcda 100644 --- a/src/systemd/sd-login.h +++ b/src/systemd/sd-login.h @@ -85,32 +85,32 @@ int sd_pid_get_machine_name(pid_t pid, char **machine); * hierarchy. */ int sd_pid_get_cgroup(pid_t pid, char **cgroup); -/* Similar to sd_pid_get_session(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_session(), but retrieves data about the peer + * of a connected AF_UNIX socket */ int sd_peer_get_session(int fd, char **session); -/* Similar to sd_pid_get_owner_uid(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_owner_uid(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_owner_uid(int fd, uid_t *uid); -/* Similar to sd_pid_get_unit(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_unit(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_unit(int fd, char **unit); -/* Similar to sd_pid_get_user_unit(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_user_unit(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_user_unit(int fd, char **unit); -/* Similar to sd_pid_get_slice(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_slice(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_slice(int fd, char **slice); -/* Similar to sd_pid_get_user_slice(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_user_slice(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_user_slice(int fd, char **slice); -/* Similar to sd_pid_get_machine_name(), but retrieves data about peer - * of connected AF_UNIX socket */ +/* Similar to sd_pid_get_machine_name(), but retrieves data about the + * peer of a a connected AF_UNIX socket */ int sd_peer_get_machine_name(int fd, char **machine); /* Similar to sd_pid_get_cgroup(), but retrieves data about the peer