diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index ce87257bc1..86d9ad7e36 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -597,3 +597,190 @@ int inotify_add_watch_fd(int fd, int what, uint32_t mask) { return r; } + +int chase_symlinks(const char *path, const char *_root, char **ret) { + _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL; + _cleanup_close_ int fd = -1; + unsigned max_follow = 32; /* how many symlinks to follow before giving up and returning ELOOP */ + char *todo; + int r; + + assert(path); + + /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following + * symlinks relative to a root directory, instead of the root of the host. + * + * Note that "root" matters only if we encounter an absolute symlink, it's unused otherwise. Most importantly + * this means the path parameter passed in is not prefixed by it. + * + * Algorithmically this operates on two path buffers: "done" are the components of the path we already + * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to + * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning + * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no + * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races + * at a minimum. */ + + r = path_make_absolute_cwd(path, &buffer); + if (r < 0) + return r; + + if (_root) { + r = path_make_absolute_cwd(_root, &root); + if (r < 0) + return r; + } + + fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + todo = buffer; + for (;;) { + _cleanup_free_ char *first = NULL; + _cleanup_close_ int child = -1; + struct stat st; + size_t n, m; + + /* Determine length of first component in the path */ + n = strspn(todo, "/"); /* The slashes */ + m = n + strcspn(todo + n, "/"); /* The entire length of the component */ + + /* Extract the first component. */ + first = strndup(todo, m); + if (!first) + return -ENOMEM; + + todo += m; + + /* Just a single slash? Then we reached the end. */ + if (isempty(first) || path_equal(first, "/")) + break; + + /* Just a dot? Then let's eat this up. */ + if (path_equal(first, "/.")) + continue; + + /* Two dots? Then chop off the last bit of what we already found out. */ + if (path_equal(first, "/..")) { + _cleanup_free_ char *parent = NULL; + int fd_parent = -1; + + if (isempty(done) || path_equal(done, "/")) + return -EINVAL; + + parent = dirname_malloc(done); + if (!parent) + return -ENOMEM; + + /* Don't allow this to leave the root dir */ + if (root && + path_startswith(done, root) && + !path_startswith(parent, root)) + return -EINVAL; + + free(done); + done = parent; + parent = NULL; + + fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd_parent < 0) + return -errno; + + safe_close(fd); + fd = fd_parent; + + continue; + } + + /* Otherwise let's see what this is. */ + child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (child < 0) + return -errno; + + if (fstat(child, &st) < 0) + return -errno; + + if (S_ISLNK(st.st_mode)) { + _cleanup_free_ char *destination = NULL; + + /* This is a symlink, in this case read the destination. But let's make sure we don't follow + * symlinks without bounds. */ + if (--max_follow <= 0) + return -ELOOP; + + r = readlinkat_malloc(fd, first + n, &destination); + if (r < 0) + return r; + if (isempty(destination)) + return -EINVAL; + + if (path_is_absolute(destination)) { + + /* An absolute destination. Start the loop from the beginning, but use the root + * directory as base. */ + + safe_close(fd); + fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + free(buffer); + buffer = destination; + destination = NULL; + + todo = buffer; + free(done); + + /* Note that we do not revalidate the root, we take it as is. */ + if (isempty(root)) + done = NULL; + else { + done = strdup(root); + if (!done) + return -ENOMEM; + } + + } else { + char *joined; + + /* A relative destination. If so, this is what we'll prefix what's left to do with what + * we just read, and start the loop again, but remain in the current directory. */ + + joined = strjoin("/", destination, todo, NULL); + if (!joined) + return -ENOMEM; + + free(buffer); + todo = buffer = joined; + } + + continue; + } + + /* If this is not a symlink, then let's just add the name we read to what we already verified. */ + if (!done) { + done = first; + first = NULL; + } else { + if (!strextend(&done, first, NULL)) + return -ENOMEM; + } + + /* And iterate again, but go one directory further down. */ + safe_close(fd); + fd = child; + child = -1; + } + + if (!done) { + /* Special case, turn the empty string into "/", to indicate the root directory. */ + done = strdup("/"); + if (!done) + return -ENOMEM; + } + + *ret = done; + done = NULL; + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 2c3b9a1c74..31df47cf1e 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -77,3 +77,5 @@ union inotify_event_buffer { }; int inotify_add_watch_fd(int fd, int what, uint32_t mask); + +int chase_symlinks(const char *path, const char *_root, char **ret); diff --git a/src/core/namespace.c b/src/core/namespace.c index 356d3c8121..d3ab2e8e3e 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "dev-setup.h" #include "fd-util.h" +#include "fs-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -57,9 +58,9 @@ typedef enum MountMode { } MountMode; typedef struct BindMount { - const char *path; + const char *path; /* stack memory, doesn't need to be freed explicitly */ + char *chased; /* malloc()ed memory, needs to be freed */ MountMode mode; - bool done; bool ignore; } BindMount; @@ -71,7 +72,6 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) { STRV_FOREACH(i, strv) { (*p)->ignore = false; - (*p)->done = false; if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') { (*p)->ignore = true; @@ -360,11 +360,8 @@ static int apply_mount( * inaccessible path. */ (void) umount_recursive(m->path, 0); - if (lstat(m->path, &target) < 0) { - if (m->ignore && errno == ENOENT) - return 0; + if (lstat(m->path, &target) < 0) return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path); - } what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -378,11 +375,8 @@ static int apply_mount( case READWRITE: r = path_is_mount_point(m->path, 0); - if (r < 0) { - if (m->ignore && errno == ENOENT) - return 0; + if (r < 0) return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); - } if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ return 0; @@ -407,12 +401,8 @@ static int apply_mount( assert(what); - if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) { - if (m->ignore && errno == ENOENT) - return 0; - + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); - } log_debug("Successfully mounted %s to %s", what, m->path); return 0; @@ -435,12 +425,43 @@ static int make_read_only(BindMount *m, char **blacklist) { * already stays this way. This improves compatibility with container managers, where we won't attempt to undo * read-only mounts already applied. */ - if (m->ignore && r == -ENOENT) - return 0; - return r; } +static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + int r; + + assert(m); + assert(n); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we + * can't resolve the path, and which have been marked for such removal. */ + + for (f = m, t = m; f < m+*n; f++) { + + r = chase_symlinks(f->path, root_directory, &f->chased); + if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */ + continue; + if (r < 0) + return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path); + + if (path_equal(f->path, f->chased)) + f->chased = mfree(f->chased); + else { + log_debug("Chased %s → %s", f->path, f->chased); + f->path = f->chased; + } + + *t = *f; + t++; + } + + *n = t - m; + return 0; +} + int setup_namespace( const char* root_directory, char** read_write_paths, @@ -456,6 +477,7 @@ int setup_namespace( unsigned long mount_flags) { BindMount *m, *mounts = NULL; + bool make_slave = false; unsigned n; int r = 0; @@ -475,6 +497,9 @@ int setup_namespace( ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) + (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0))); + if (root_directory || n > 0) + make_slave = true; + if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); r = append_mounts(&m, read_write_paths, READWRITE); @@ -596,6 +621,13 @@ int setup_namespace( assert(mounts + n == m); + /* Resolve symlinks manually first, as mount() will always follow them relative to the host's + * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit + * racy. */ + r = chase_all_symlinks(root_directory, mounts, &n); + if (r < 0) + goto finish; + qsort(mounts, n, sizeof(BindMount), mount_path_compare); drop_duplicates(mounts, &n); @@ -603,20 +635,26 @@ int setup_namespace( drop_nop(mounts, &n); } - if (unshare(CLONE_NEWNS) < 0) - return -errno; + if (unshare(CLONE_NEWNS) < 0) { + r = -errno; + goto finish; + } - if (n > 0 || root_directory) { + if (make_slave) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - return -errno; + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (root_directory) { /* Turn directory into bind mount */ - if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) - return -errno; + if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (n > 0) { @@ -627,7 +665,7 @@ int setup_namespace( for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto fail; + goto finish; } /* Create a blacklist we can pass to bind_mount_recursive() */ @@ -640,35 +678,31 @@ int setup_namespace( for (m = mounts; m < mounts + n; ++m) { r = make_read_only(m, blacklist); if (r < 0) - goto fail; + goto finish; } } if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - if (r < 0) /* at this point, we cannot rollback */ - return r; + if (r < 0) + goto finish; } /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ - if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - return -errno; /* at this point, we cannot rollback */ - - return 0; - -fail: - if (n > 0) { - for (m = mounts; m < mounts + n; ++m) { - if (!m->done) - continue; - - (void) umount2(m->path, MNT_DETACH); - } + if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { + r = -errno; + goto finish; } + r = 0; + +finish: + for (m = mounts; m < mounts + n; m++) + free(m->chased); + return r; } diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index b35a2ea2c8..53a3cdc663 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -20,16 +20,109 @@ #include #include "alloc-util.h" -#include "fileio.h" #include "fd-util.h" +#include "fileio.h" #include "fs-util.h" #include "macro.h" #include "mkdir.h" +#include "path-util.h" #include "rm-rf.h" #include "string-util.h" #include "strv.h" #include "util.h" +static void test_chase_symlinks(void) { + _cleanup_free_ char *result = NULL; + char temp[] = "/tmp/test-chase.XXXXXX"; + const char *top, *p, *q; + int r; + + assert_se(mkdtemp(temp)); + + top = strjoina(temp, "/top"); + assert_se(mkdir(top, 0700) >= 0); + + p = strjoina(top, "/dot"); + assert_se(symlink(".", p) >= 0); + + p = strjoina(top, "/dotdot"); + assert_se(symlink("..", p) >= 0); + + p = strjoina(top, "/dotdota"); + assert_se(symlink("../a", p) >= 0); + + p = strjoina(temp, "/a"); + assert_se(symlink("b", p) >= 0); + + p = strjoina(temp, "/b"); + assert_se(symlink("/usr", p) >= 0); + + p = strjoina(temp, "/start"); + assert_se(symlink("top/dot/dotdota", p) >= 0); + + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/usr")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r == -ENOENT); + + q = strjoina(temp, "/usr"); + assert_se(mkdir(q, 0700) >= 0); + + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, q)); + + p = strjoina(temp, "/slash"); + assert_se(symlink("/", p) >= 0); + + result = mfree(result); + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, temp)); + + p = strjoina(temp, "/slashslash"); + assert_se(symlink("///usr///", p) >= 0); + + result = mfree(result); + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/usr")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, q)); + + result = mfree(result); + r = chase_symlinks("/etc/./.././", NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/")); + + result = mfree(result); + r = chase_symlinks("/etc/./.././", "/etc", &result); + assert_se(r == -EINVAL); + + result = mfree(result); + r = chase_symlinks("/etc/machine-id/foo", NULL, &result); + assert_se(r == -ENOTDIR); + + result = mfree(result); + p = strjoina(temp, "/recursive-symlink"); + assert_se(symlink("recursive-symlink", p) >= 0); + r = chase_symlinks(p, NULL, &result); + assert_se(r == -ELOOP); + + assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); +} + static void test_unlink_noerrno(void) { char name[] = "/tmp/test-close_nointr.XXXXXX"; int fd; @@ -144,6 +237,7 @@ int main(int argc, char *argv[]) { test_readlink_and_make_absolute(); test_get_files_in_directory(); test_var_tmp(); + test_chase_symlinks(); return 0; } diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 03a24620af..c4d4da6d05 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -26,14 +26,18 @@ int main(int argc, char *argv[]) { const char * const writable[] = { "/home", - "/home/lennart/projects/foobar", /* this should be masked automatically */ + "-/home/lennart/projects/foobar", /* this should be masked automatically */ NULL }; const char * const readonly[] = { - "/", - "/usr", + /* "/", */ + /* "/usr", */ "/boot", + "/lib", + "/usr/lib", + "-/lib64", + "-/usr/lib64", NULL };