sd-bus: sync with kdbus upstream (ABI break)

kdbus has seen a larger update than expected lately, most notably with
kdbusfs, a file system to expose the kdbus control files:

 * Each time a file system of this type is mounted, a new kdbus
   domain is created.

 * The layout inside each mount point is the same as before, except
   that domains are not hierarchically nested anymore.

 * Domains are therefore also unnamed now.

 * Unmounting a kdbusfs will automatically also detroy the
   associated domain.

 * Hence, the action of creating a kdbus domain is now as
   privileged as mounting a filesystem.

 * This way, we can get around creating dev nodes for everything,
   which is last but not least something that is not limited by
   20-bit minor numbers.

The kdbus specific bits in nspawn have all been dropped now, as nspawn
can rely on the container OS to set up its own kdbus domain, simply by
mounting a new instance.

A new set of mounts has been added to mount things *after* the kernel
modules have been loaded. For now, only kdbus is in this set, which is
invoked with mount_setup_late().
This commit is contained in:
Daniel Mack 2014-11-13 20:33:03 +01:00
parent c1ec25a063
commit 63cc4c3138
17 changed files with 64 additions and 165 deletions

View file

@ -71,7 +71,7 @@ int kmod_setup(void) {
{ "unix", "/proc/net/unix", true, NULL },
/* IPC is needed before we bring up any other services */
{ "kdbus", "/sys/bus/kdbus", false, cmdline_check_kdbus },
{ "kdbus", "/sys/fs/kdbus", false, cmdline_check_kdbus },
};
struct kmod_ctx *ctx = NULL;
unsigned int i;

View file

@ -1556,6 +1556,7 @@ int main(int argc, char *argv[]) {
#ifdef HAVE_KMOD
kmod_setup();
#endif
mount_setup_late();
hostname_setup();
machine_id_setup(NULL);
loopback_setup();

View file

@ -729,13 +729,6 @@ static int manager_setup_kdbus(Manager *m) {
}
log_debug("Successfully set up kdbus on %s", p);
/* Create the namespace directory here, so that the contents
* of that directory is not visible to non-root users. This is
* necessary to ensure that users cannot get access to busses
* of virtualized users when no UID namespacing is used. */
if (m->running_as == SYSTEMD_SYSTEM)
mkdir_p_label("/dev/kdbus/domain", 0700);
#endif
return 0;

View file

@ -110,6 +110,13 @@ static const MountPoint mount_table[] = {
#endif
};
static const MountPoint mount_table_late[] = {
#ifdef ENABLE_KDBUS
{ "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_IN_CONTAINER },
#endif
};
/* These are API file systems that might be mounted by other software,
* we just list them here so that we know that we should ignore them */
@ -221,6 +228,21 @@ int mount_setup_early(void) {
return r;
}
int mount_setup_late(void) {
unsigned i;
int r = 0;
for (i = 0; i < ELEMENTSOF(mount_table_late); i ++) {
int j;
j = mount_one(mount_table_late + i, false);
if (r == 0)
r = j;
}
return r;
}
int mount_cgroup_controllers(char ***join_controllers) {
_cleanup_set_free_free_ Set *controllers = NULL;
_cleanup_fclose_ FILE *f;

View file

@ -24,6 +24,7 @@
#include <stdbool.h>
int mount_setup_early(void);
int mount_setup_late(void);
int mount_setup(bool loaded_policy);

View file

@ -145,7 +145,7 @@ static int mount_dev(BindMount *m) {
"/dev/tty\0";
char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
_cleanup_umask_ mode_t u;
int r;
@ -185,10 +185,6 @@ static int mount_dev(BindMount *m) {
mkdir(devmqueue, 0755);
mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
devkdbus = strappenda(temporary_mount, "/dev/kdbus");
mkdir(devkdbus, 0755);
mount("/dev/kdbus", devkdbus, NULL, MS_BIND, NULL);
devhugepages = strappenda(temporary_mount, "/dev/hugepages");
mkdir(devhugepages, 0755);
mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
@ -254,9 +250,6 @@ fail:
if (devshm)
umount(devshm);
if (devkdbus)
umount(devkdbus);
if (devhugepages)
umount(devhugepages);

View file

@ -319,7 +319,7 @@ int main(int argc, char *argv[]) {
umask(0022);
if (access("/dev/kdbus/control", F_OK) < 0)
if (access("/sys/fs/kdbus/control", F_OK) < 0)
return 0;
r = cg_pid_get_owner_uid(0, NULL);

View file

@ -14,11 +14,11 @@ GVariant compatible marshaler to your library first.
After you have done that: here's the basic principle how kdbus works:
You connect to a bus by opening its bus node in /dev/kdbus/. All
You connect to a bus by opening its bus node in /sys/fs/kdbus/. All
buses have a device node there, it starts with a numeric UID of the
owner of the bus, followed by a dash and a string identifying the
bus. The system bus is thus called /dev/kdbus/0-system, and for user
buses the device node is /dev/kdbus/1000-user (if 1000 is your user
bus. The system bus is thus called /sys/fs/kdbus/0-system, and for user
buses the device node is /sys/fs/kdbus/1000-user (if 1000 is your user
id).
(Before we proceed, please always keep a copy of libsystemd next
@ -496,12 +496,12 @@ parameter.
Client libraries should use the following connection string when
connecting to the system bus:
kernel:path=/dev/kdbus/0-system/bus;unix:path=/var/run/dbus/system_bus_socket
kernel:path=/sys/fs/kdbus/0-system/bus;unix:path=/var/run/dbus/system_bus_socket
This will ensure that kdbus is preferred over the legacy AF_UNIX
socket, but compatibility is kept. For the user bus use:
kernel:path=/dev/kdbus/$UID-user/bus;unix:path=$XDG_RUNTIME_DIR/bus
kernel:path=/sys/fs/kdbus/$UID-user/bus;unix:path=$XDG_RUNTIME_DIR/bus
With $UID replaced by the callers numer user ID, and $XDG_RUNTIME_DIR
following the XDG basedir spec.

View file

@ -1282,7 +1282,7 @@ int bus_kernel_create_bus(const char *name, bool world, char **s) {
assert(name);
assert(s);
fd = open("/dev/kdbus/control", O_RDWR|O_NOCTTY|O_CLOEXEC);
fd = open("/sys/fs/kdbus/control", O_RDWR|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
return -errno;
@ -1323,7 +1323,7 @@ int bus_kernel_create_bus(const char *name, bool world, char **s) {
if (s) {
char *p;
p = strjoin("/dev/kdbus/", n->str, "/bus", NULL);
p = strjoin("/sys/fs/kdbus/", n->str, "/bus", NULL);
if (!p) {
safe_close(fd);
return -ENOMEM;
@ -1403,7 +1403,7 @@ int bus_kernel_open_bus_fd(const char *bus, char **path) {
int fd;
size_t len;
len = strlen("/dev/kdbus/") + DECIMAL_STR_MAX(uid_t) + 1 + strlen(bus) + strlen("/bus") + 1;
len = strlen("/sys/fs/kdbus/") + DECIMAL_STR_MAX(uid_t) + 1 + strlen(bus) + strlen("/bus") + 1;
if (path) {
p = malloc(len);
@ -1412,7 +1412,7 @@ int bus_kernel_open_bus_fd(const char *bus, char **path) {
*path = p;
} else
p = alloca(len);
sprintf(p, "/dev/kdbus/" UID_FMT "-%s/bus", getuid(), bus);
sprintf(p, "/sys/fs/kdbus/" UID_FMT "-%s/bus", getuid(), bus);
fd = open(p, O_RDWR|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
@ -1585,58 +1585,6 @@ int bus_kernel_make_starter(
return fd;
}
int bus_kernel_create_domain(const char *name, char **s) {
struct kdbus_cmd_make *make;
struct kdbus_item *n;
int fd;
assert(name);
assert(s);
fd = open("/dev/kdbus/control", O_RDWR|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
return -errno;
make = alloca0_align(ALIGN8(offsetof(struct kdbus_cmd_make, items) +
offsetof(struct kdbus_item, str) +
strlen(name) + 1),
8);
n = make->items;
strcpy(n->str, name);
n->size = offsetof(struct kdbus_item, str) + strlen(n->str) + 1;
n->type = KDBUS_ITEM_MAKE_NAME;
make->size = ALIGN8(offsetof(struct kdbus_cmd_make, items) + n->size);
make->flags = KDBUS_MAKE_ACCESS_WORLD;
if (ioctl(fd, KDBUS_CMD_DOMAIN_MAKE, make) < 0) {
safe_close(fd);
return -errno;
}
/* The higher 32bit of the flags field are considered
* 'incompatible flags'. Refuse them all for now. */
if (make->flags > 0xFFFFFFFFULL) {
safe_close(fd);
return -ENOTSUP;
}
if (s) {
char *p;
p = strappend("/dev/kdbus/domain/", name);
if (!p) {
safe_close(fd);
return -ENOMEM;
}
*s = p;
}
return fd;
}
int bus_kernel_try_close(sd_bus *bus) {
assert(bus);
assert(bus->is_kernel);

View file

@ -71,7 +71,6 @@ int bus_kernel_make_starter(int fd, const char *name, bool activating, bool acce
int bus_kernel_create_bus(const char *name, bool world, char **s);
int bus_kernel_create_endpoint(const char *bus_name, const char *ep_name, char **path);
int bus_kernel_create_domain(const char *name, char **s);
int bus_kernel_set_endpoint_policy(int fd, uid_t uid, BusEndpoint *ep);

View file

@ -635,8 +635,8 @@ enum kdbus_make_flags {
* @kernel_flags: Supported flags for the used command, kernel userspace
* @items: Items describing details
*
* This structure is used with the KDBUS_CMD_BUS_MAKE, KDBUS_CMD_ENDPOINT_MAKE
* and KDBUS_CMD_DOMAIN_MAKE ioctls.
* This structure is used with the KDBUS_CMD_BUS_MAKE and
* KDBUS_CMD_ENDPOINT_MAKE ioctls.
*/
struct kdbus_cmd_make {
__u64 size;
@ -839,8 +839,6 @@ struct kdbus_cmd_match {
* name. The bus is immediately shut down and
* cleaned up when the opened "control" device node
* is closed.
* KDBUS_CMD_DOMAIN_MAKE: Similar to KDBUS_CMD_BUS_MAKE, but it creates a
* new kdbus domain.
* KDBUS_CMD_ENDPOINT_MAKE: Creates a new named special endpoint to talk to
* the bus. Such endpoints usually carry a more
* restrictive policy and grant restricted access
@ -887,44 +885,42 @@ struct kdbus_cmd_match {
*/
#define KDBUS_CMD_BUS_MAKE _IOW(KDBUS_IOCTL_MAGIC, 0x00, \
struct kdbus_cmd_make)
#define KDBUS_CMD_DOMAIN_MAKE _IOW(KDBUS_IOCTL_MAGIC, 0x10, \
struct kdbus_cmd_make)
#define KDBUS_CMD_ENDPOINT_MAKE _IOW(KDBUS_IOCTL_MAGIC, 0x20, \
#define KDBUS_CMD_ENDPOINT_MAKE _IOW(KDBUS_IOCTL_MAGIC, 0x10, \
struct kdbus_cmd_make)
#define KDBUS_CMD_HELLO _IOWR(KDBUS_IOCTL_MAGIC, 0x30, \
#define KDBUS_CMD_HELLO _IOWR(KDBUS_IOCTL_MAGIC, 0x20, \
struct kdbus_cmd_hello)
#define KDBUS_CMD_BYEBYE _IO(KDBUS_IOCTL_MAGIC, 0x31) \
#define KDBUS_CMD_BYEBYE _IO(KDBUS_IOCTL_MAGIC, 0x21) \
#define KDBUS_CMD_MSG_SEND _IOWR(KDBUS_IOCTL_MAGIC, 0x40, \
#define KDBUS_CMD_MSG_SEND _IOWR(KDBUS_IOCTL_MAGIC, 0x30, \
struct kdbus_msg)
#define KDBUS_CMD_MSG_RECV _IOWR(KDBUS_IOCTL_MAGIC, 0x41, \
#define KDBUS_CMD_MSG_RECV _IOWR(KDBUS_IOCTL_MAGIC, 0x31, \
struct kdbus_cmd_recv)
#define KDBUS_CMD_MSG_CANCEL _IOW(KDBUS_IOCTL_MAGIC, 0x42, \
#define KDBUS_CMD_MSG_CANCEL _IOW(KDBUS_IOCTL_MAGIC, 0x32, \
struct kdbus_cmd_cancel)
#define KDBUS_CMD_FREE _IOW(KDBUS_IOCTL_MAGIC, 0x43, \
#define KDBUS_CMD_FREE _IOW(KDBUS_IOCTL_MAGIC, 0x33, \
struct kdbus_cmd_free)
#define KDBUS_CMD_NAME_ACQUIRE _IOWR(KDBUS_IOCTL_MAGIC, 0x50, \
#define KDBUS_CMD_NAME_ACQUIRE _IOWR(KDBUS_IOCTL_MAGIC, 0x40, \
struct kdbus_cmd_name)
#define KDBUS_CMD_NAME_RELEASE _IOW(KDBUS_IOCTL_MAGIC, 0x51, \
#define KDBUS_CMD_NAME_RELEASE _IOW(KDBUS_IOCTL_MAGIC, 0x41, \
struct kdbus_cmd_name)
#define KDBUS_CMD_NAME_LIST _IOWR(KDBUS_IOCTL_MAGIC, 0x52, \
#define KDBUS_CMD_NAME_LIST _IOWR(KDBUS_IOCTL_MAGIC, 0x42, \
struct kdbus_cmd_name_list)
#define KDBUS_CMD_CONN_INFO _IOWR(KDBUS_IOCTL_MAGIC, 0x60, \
#define KDBUS_CMD_CONN_INFO _IOWR(KDBUS_IOCTL_MAGIC, 0x50, \
struct kdbus_cmd_info)
#define KDBUS_CMD_CONN_UPDATE _IOW(KDBUS_IOCTL_MAGIC, 0x61, \
#define KDBUS_CMD_CONN_UPDATE _IOW(KDBUS_IOCTL_MAGIC, 0x51, \
struct kdbus_cmd_update)
#define KDBUS_CMD_BUS_CREATOR_INFO _IOWR(KDBUS_IOCTL_MAGIC, 0x62, \
#define KDBUS_CMD_BUS_CREATOR_INFO _IOWR(KDBUS_IOCTL_MAGIC, 0x52, \
struct kdbus_cmd_info)
#define KDBUS_CMD_ENDPOINT_UPDATE _IOW(KDBUS_IOCTL_MAGIC, 0x71, \
#define KDBUS_CMD_ENDPOINT_UPDATE _IOW(KDBUS_IOCTL_MAGIC, 0x61, \
struct kdbus_cmd_update)
#define KDBUS_CMD_MATCH_ADD _IOW(KDBUS_IOCTL_MAGIC, 0x80, \
#define KDBUS_CMD_MATCH_ADD _IOW(KDBUS_IOCTL_MAGIC, 0x70, \
struct kdbus_cmd_match)
#define KDBUS_CMD_MATCH_REMOVE _IOW(KDBUS_IOCTL_MAGIC, 0x81, \
#define KDBUS_CMD_MATCH_REMOVE _IOW(KDBUS_IOCTL_MAGIC, 0x71, \
struct kdbus_cmd_match)
#endif /* _KDBUS_UAPI_H_ */

View file

@ -807,7 +807,7 @@ static int parse_container_kernel_address(sd_bus *b, const char **p, char **guid
machine = NULL;
free(b->kernel);
b->kernel = strdup("/dev/kdbus/0-system/bus");
b->kernel = strdup("/sys/fs/kdbus/0-system/bus");
if (!b->kernel)
return -ENOMEM;

View file

@ -180,7 +180,7 @@ static int export_legacy_dbus_address(
int r;
/* skip export if kdbus is not active */
if (access("/dev/kdbus", F_OK) < 0)
if (access("/sys/fs/kdbus", F_OK) < 0)
return PAM_SUCCESS;
if (asprintf(&s, KERNEL_USER_BUS_FMT ";" UNIX_USER_BUS_FMT,

View file

@ -1441,26 +1441,6 @@ static int setup_journal(const char *directory) {
return 0;
}
static int setup_kdbus(const char *dest, const char *path) {
const char *p;
if (!path)
return 0;
p = strappenda(dest, "/dev/kdbus");
if (mkdir(p, 0755) < 0) {
log_error("Failed to create kdbus path: %m");
return -errno;
}
if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
log_error("Failed to mount kdbus domain path: %m");
return -errno;
}
return 0;
}
static int drop_capabilities(void) {
return capability_bounding_set_drop(~arg_retain, false);
}
@ -1546,7 +1526,7 @@ static int register_machine(pid_t pid, int local_ifindex) {
return r;
}
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 11,
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 9,
/* Allow the container to
* access and create the API
* device nodes, so that
@ -1566,18 +1546,7 @@ static int register_machine(pid_t pid, int local_ifindex) {
* container to ever create
* these device nodes. */
"/dev/pts/ptmx", "rw",
"char-pts", "rw",
/* Allow the container
* access to all kdbus
* devices. Again, the
* container cannot create
* these nodes, only use
* them. We use a pretty
* open match here, so that
* the kernel API can still
* change. */
"char-kdbus", "rw",
"char-kdbus/*", "rw");
"char-pts", "rw");
if (r < 0) {
log_error("Failed to add device whitelist: %s", strerror(-r));
return r;
@ -2991,9 +2960,9 @@ static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo
int main(int argc, char *argv[]) {
_cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
_cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
bool root_device_rw = true, home_device_rw = true, srv_device_rw = true;
_cleanup_close_ int master = -1, kdbus_fd = -1, image_fd = -1;
_cleanup_close_ int master = -1, image_fd = -1;
_cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 };
_cleanup_fdset_free_ FDSet *fds = NULL;
int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
@ -3140,26 +3109,6 @@ int main(int argc, char *argv[]) {
goto finish;
}
if (access("/dev/kdbus/control", F_OK) >= 0) {
if (arg_share_system) {
kdbus_domain = strdup("/dev/kdbus");
if (!kdbus_domain) {
log_oom();
goto finish;
}
} else {
const char *ns;
ns = strappenda("machine-", arg_machine);
kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
if (r < 0)
log_debug("Failed to create kdbus domain: %s", strerror(-r));
else
log_debug("Successfully created kdbus domain as %s", kdbus_domain);
}
}
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
log_error("Failed to create kmsg socket pair: %m");
goto finish;
@ -3365,9 +3314,6 @@ int main(int argc, char *argv[]) {
if (mount_tmpfs(arg_directory) < 0)
_exit(EXIT_FAILURE);
if (setup_kdbus(arg_directory, kdbus_domain) < 0)
_exit(EXIT_FAILURE);
/* Tell the parent that we are ready, and that
* it can cgroupify us to that we lack access
* to certain devices and resources. */

View file

@ -62,7 +62,7 @@
#endif
#define UNIX_SYSTEM_BUS_PATH "unix:path=/var/run/dbus/system_bus_socket"
#define KERNEL_SYSTEM_BUS_PATH "kernel:path=/dev/kdbus/0-system/bus"
#define KERNEL_SYSTEM_BUS_PATH "kernel:path=/sys/fs/kdbus/0-system/bus"
#ifdef ENABLE_KDBUS
# define DEFAULT_SYSTEM_BUS_PATH KERNEL_SYSTEM_BUS_PATH ";" UNIX_SYSTEM_BUS_PATH
@ -71,7 +71,7 @@
#endif
#define UNIX_USER_BUS_FMT "unix:path=%s/bus"
#define KERNEL_USER_BUS_FMT "kernel:path=/dev/kdbus/"UID_FMT"-user/bus"
#define KERNEL_USER_BUS_FMT "kernel:path=/sys/fs/kdbus/"UID_FMT"-user/bus"
#define PLYMOUTH_SOCKET { \
.un.sun_family = AF_UNIX, \

View file

@ -12,7 +12,7 @@ Description=Legacy D-Bus Protocol Compatibility Daemon
# The first argument will be replaced by the service by information on
# the process requesting the proxy, we need a placeholder to keep the
# space available for this.
ExecStart=@rootlibexecdir@/systemd-bus-proxyd --drop-privileges --address=kernel:path=/dev/kdbus/0-system/bus --configuration=/etc/dbus-1/system.conf --configuration=/etc/dbus-1/system-local.conf --configuration=/etc/dbus-1/system.d/ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ExecStart=@rootlibexecdir@/systemd-bus-proxyd --drop-privileges --address=kernel:path=/sys/fs/kdbus/0-system/bus --configuration=/etc/dbus-1/system.conf --configuration=/etc/dbus-1/system-local.conf --configuration=/etc/dbus-1/system.d/ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
NotifyAccess=main
CapabilityBoundingSet=CAP_IPC_OWNER CAP_SETUID CAP_SETGID CAP_SETPCAP
PrivateTmp=yes

View file

@ -12,5 +12,5 @@ Description=Legacy D-Bus Protocol Compatibility Daemon
# The first argument will be replaced by the service by information on
# the process requesting the proxy, we need a placeholder to keep the
# space available for this.
ExecStart=@rootlibexecdir@/systemd-bus-proxyd --address=kernel:path=/dev/kdbus/%U-user/bus --configuration=/etc/dbus-1/session.conf --configuration=/etc/dbus-1/session-local.conf --configuration=/etc/dbus-1/session.d/ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ExecStart=@rootlibexecdir@/systemd-bus-proxyd --address=kernel:path=/sys/fs/kdbus/%U-user/bus --configuration=/etc/dbus-1/session.conf --configuration=/etc/dbus-1/session-local.conf --configuration=/etc/dbus-1/session.d/ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
NotifyAccess=main