From 30874dda3a66c0639773dd23079662fc4bf53afd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 27 Jul 2018 18:04:11 +0200 Subject: [PATCH] dev-setup: generalize logic we use to create "inaccessible" device nodes Let's generalize this, so that we can use this in nspawn later on, which is pretty useful as we need to be able to mask files from the inner child of nspawn too, where the host's /run/systemd/inaccessible directory is not visible anymore. Moreover, if nspawn can create these nodes on its own before the payload this means the payload can run with fewer privileges. --- src/core/mount-setup.c | 17 ++--------- src/shared/dev-setup.c | 59 +++++++++++++++++++++++++++++++++++++ src/shared/dev-setup.h | 2 ++ src/test/meson.build | 4 +++ src/test/test-dev-setup.c | 62 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 14 deletions(-) create mode 100644 src/test/test-dev-setup.c diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index e15d94d98a..4c17395774 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -460,20 +460,9 @@ int mount_setup(bool loaded_policy) { (void) mkdir_label("/run/systemd", 0755); (void) mkdir_label("/run/systemd/system", 0755); - /* Set up inaccessible (and empty) file nodes of all types */ - (void) mkdir_label("/run/systemd/inaccessible", 0000); - (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0); - (void) mkdir_label("/run/systemd/inaccessible/dir", 0000); - (void) mkfifo("/run/systemd/inaccessible/fifo", 0000); - (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0); - - /* The following two are likely to fail if we lack the privs for it (for example in an userns environment, if - * CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 device nodes to be - * created). But that's entirely fine. Consumers of these files should carry fallback to use a different node - * then, for example /run/systemd/inaccessible/sock, which is close enough in behaviour and semantics for most - * uses. */ - (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); - (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); + /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes + * from. */ + (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID); return 0; } diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c index d117fbfda0..b545c2a1c0 100644 --- a/src/shared/dev-setup.c +++ b/src/shared/dev-setup.c @@ -9,6 +9,7 @@ #include "label.h" #include "log.h" #include "path-util.h" +#include "umask-util.h" #include "user-util.h" #include "util.h" @@ -54,3 +55,61 @@ int dev_setup(const char *prefix, uid_t uid, gid_t gid) { return 0; } + +int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid) { + static const struct { + const char *name; + mode_t mode; + } table[] = { + { "/run/systemd", S_IFDIR | 0755 }, + { "/run/systemd/inaccessible", S_IFDIR | 0000 }, + { "/run/systemd/inaccessible/reg", S_IFREG | 0000 }, + { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 }, + { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 }, + { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 }, + + /* The following two are likely to fail if we lack the privs for it (for example in an userns + * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 + * device nodes to be created). But that's entirely fine. Consumers of these files should carry + * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close + * enough in behaviour and semantics for most uses. */ + { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 }, + { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 }, + }; + + _cleanup_umask_ mode_t u; + size_t i; + int r; + + u = umask(0000); + + /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting + * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try + * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the + * underlying file, i.e. in the best case we offer the same node type as the underlying node. */ + + for (i = 0; i < ELEMENTSOF(table); i++) { + _cleanup_free_ char *path = NULL; + + path = prefix_root(root, table[i].name); + if (!path) + return log_oom(); + + if (S_ISDIR(table[i].mode)) + r = mkdir(path, table[i].mode & 07777); + else + r = mknod(path, table[i].mode, makedev(0, 0)); + if (r < 0) { + if (errno != EEXIST) + log_debug_errno(errno, "Failed to create '%s', ignoring: %m", path); + continue; + } + + if (uid != UID_INVALID || gid != GID_INVALID) { + if (lchown(path, uid, gid) < 0) + log_debug_errno(errno, "Failed to chown '%s': %m", path); + } + } + + return 0; +} diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h index f105f2f20f..72b90ec4de 100644 --- a/src/shared/dev-setup.h +++ b/src/shared/dev-setup.h @@ -4,3 +4,5 @@ #include int dev_setup(const char *prefix, uid_t uid, gid_t gid); + +int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid); diff --git a/src/test/meson.build b/src/test/meson.build index ade905733e..2635456a4f 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -156,6 +156,10 @@ tests += [ [], []], + [['src/test/test-dev-setup.c'], + [], + []], + [['src/test/test-capability.c'], [], [libcap]], diff --git a/src/test/test-dev-setup.c b/src/test/test-dev-setup.c new file mode 100644 index 0000000000..523cfe43b1 --- /dev/null +++ b/src/test/test-dev-setup.c @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "capability-util.h" +#include "dev-setup.h" +#include "fileio.h" +#include "fs-util.h" +#include "path-util.h" +#include "rm-rf.h" + +int main(int argc, char *argv[]) { + _cleanup_(rm_rf_physical_and_freep) char *p = NULL; + const char *f; + struct stat st; + + if (have_effective_cap(CAP_DAC_OVERRIDE) <= 0) + return EXIT_TEST_SKIP; + + assert_se(mkdtemp_malloc("/tmp/test-dev-setupXXXXXX", &p) >= 0); + + f = prefix_roota(p, "/run"); + assert_se(mkdir(f, 0755) >= 0); + + assert_se(make_inaccessible_nodes(p, 1, 1) >= 0); + + f = prefix_roota(p, "/run/systemd/inaccessible/reg"); + assert_se(stat(f, &st) >= 0); + assert_se(S_ISREG(st.st_mode)); + assert_se((st.st_mode & 07777) == 0000); + + f = prefix_roota(p, "/run/systemd/inaccessible/dir"); + assert_se(stat(f, &st) >= 0); + assert_se(S_ISDIR(st.st_mode)); + assert_se((st.st_mode & 07777) == 0000); + + f = prefix_roota(p, "/run/systemd/inaccessible/fifo"); + assert_se(stat(f, &st) >= 0); + assert_se(S_ISFIFO(st.st_mode)); + assert_se((st.st_mode & 07777) == 0000); + + f = prefix_roota(p, "/run/systemd/inaccessible/sock"); + assert_se(stat(f, &st) >= 0); + assert_se(S_ISSOCK(st.st_mode)); + assert_se((st.st_mode & 07777) == 0000); + + f = prefix_roota(p, "/run/systemd/inaccessible/chr"); + if (stat(f, &st) < 0) + assert_se(errno == ENOENT); + else { + assert_se(S_ISCHR(st.st_mode)); + assert_se((st.st_mode & 07777) == 0000); + } + + f = prefix_roota(p, "/run/systemd/inaccessible/blk"); + if (stat(f, &st) < 0) + assert_se(errno == ENOENT); + else { + assert_se(S_ISBLK(st.st_mode)); + assert_se((st.st_mode & 07777) == 0000); + } + + return EXIT_SUCCESS; +}