diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index a6efbc3a7b..2774e48cf3 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -700,24 +700,33 @@ int repeat_unmount(const char *path, int flags) { } const char* mode_to_inaccessible_node(mode_t mode) { - /* This function maps a node type to the correspondent inaccessible node type. - * Character and block inaccessible devices may not be created (because major=0 and minor=0), - * in such case we map character and block devices to the inaccessible node type socket. */ + /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during + * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe + * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes + * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead, + * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts + * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */ + switch(mode & S_IFMT) { case S_IFREG: return "/run/systemd/inaccessible/reg"; + case S_IFDIR: return "/run/systemd/inaccessible/dir"; + case S_IFCHR: if (access("/run/systemd/inaccessible/chr", F_OK) == 0) return "/run/systemd/inaccessible/chr"; return "/run/systemd/inaccessible/sock"; + case S_IFBLK: if (access("/run/systemd/inaccessible/blk", F_OK) == 0) return "/run/systemd/inaccessible/blk"; return "/run/systemd/inaccessible/sock"; + case S_IFIFO: return "/run/systemd/inaccessible/fifo"; + case S_IFSOCK: return "/run/systemd/inaccessible/sock"; } diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 4daed0eff1..ac2412bf53 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -457,14 +457,20 @@ int mount_setup(bool loaded_policy) { (void) mkdir_label("/run/systemd", 0755); (void) mkdir_label("/run/systemd/system", 0755); - /* Set up inaccessible items */ + /* Set up inaccessible (and empty) file nodes of all types */ (void) mkdir_label("/run/systemd/inaccessible", 0000); (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0); (void) mkdir_label("/run/systemd/inaccessible/dir", 0000); - (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); - (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); (void) mkfifo("/run/systemd/inaccessible/fifo", 0000); (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0); + /* The following two are likely to fail if we lack the privs for it (for example in an userns environment, if + * CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 device nodes to be + * created). But that's entirely fine. Consumers of these files should carry fallback to use a different node + * then, for example /run/systemd/inaccessible/sock, which is close enough in behaviour and semantics for most + * uses. */ + (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); + (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); + return 0; }