Systemd/src/basic/fd-util.c

/* SPDX-License-Identifier: LGPL-2.1+ */

#include <errno.h>
#include <fcntl.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <unistd.h>

#include "alloc-util.h"
#include "copy.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "io-util.h"
#include "macro.h"
#include "memfd-util.h"
#include "missing_fcntl.h"
#include "missing_syscall.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "socket-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "tmpfile-util.h"
#include "util.h"

/* The maximum number of iterations in the loop to close descriptors in the fallback case
 * when /proc/self/fd/ is inaccessible. */
#define MAX_FD_LOOP_LIMIT (1024*1024)

int close_nointr(int fd) {
        assert(fd >= 0);

        if (close(fd) >= 0)
                return 0;

        /*
         * Just ignore EINTR; a retry loop is the wrong thing to do on
         * Linux.
         *
         * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
         * https://bugzilla.gnome.org/show_bug.cgi?id=682819
         * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
         * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
         */
        if (errno == EINTR)
                return 0;

        return -errno;
}

int safe_close(int fd) {

        /*
         * Like close_nointr() but cannot fail. Guarantees errno is
         * unchanged. Is a NOP with negative fds passed, and returns
         * -1, so that it can be used in this syntax:
         *
         * fd = safe_close(fd);
         */

        if (fd >= 0) {
                PROTECT_ERRNO;

                /* The kernel might return pretty much any error code
                 * via close(), but the fd will be closed anyway. The
                 * only condition we want to check for here is whether
                 * the fd was invalid at all... */

                assert_se(close_nointr(fd) != -EBADF);
        }

        return -1;
}

void safe_close_pair(int p[static 2]) {
        assert(p);

        if (p[0] == p[1]) {
                /* Special case pairs which use the same fd in both
                 * directions... */
                p[0] = p[1] = safe_close(p[0]);
                return;
        }

        p[0] = safe_close(p[0]);
        p[1] = safe_close(p[1]);
}

void close_many(const int fds[], size_t n_fd) {
        size_t i;

        assert(fds || n_fd <= 0);

        for (i = 0; i < n_fd; i++)
                safe_close(fds[i]);
}

int fclose_nointr(FILE *f) {
        assert(f);

        /* Same as close_nointr(), but for fclose() */

        errno = 0; /* Extra safety: if the FILE* object is not encapsulating an fd, it might not set errno
                    * correctly. Let's hence initialize it to zero first, so that we aren't confused by any
                    * prior errno here */
        if (fclose(f) == 0)
                return 0;

        if (errno == EINTR)
                return 0;

        return errno_or_else(EIO);
}

FILE* safe_fclose(FILE *f) {

        /* Same as safe_close(), but for fclose() */

        if (f) {
                PROTECT_ERRNO;

                assert_se(fclose_nointr(f) != -EBADF);
        }

        return NULL;
}

DIR* safe_closedir(DIR *d) {

        if (d) {
                PROTECT_ERRNO;

                assert_se(closedir(d) >= 0 || errno != EBADF);
        }

        return NULL;
}

int fd_nonblock(int fd, bool nonblock) {
        int flags, nflags;

        assert(fd >= 0);

        flags = fcntl(fd, F_GETFL, 0);
        if (flags < 0)
                return -errno;

        nflags = UPDATE_FLAG(flags, O_NONBLOCK, nonblock);
        if (nflags == flags)
                return 0;

        if (fcntl(fd, F_SETFL, nflags) < 0)
                return -errno;

        return 0;
}

int fd_cloexec(int fd, bool cloexec) {
        int flags, nflags;

        assert(fd >= 0);

        flags = fcntl(fd, F_GETFD, 0);
        if (flags < 0)
                return -errno;

        nflags = UPDATE_FLAG(flags, FD_CLOEXEC, cloexec);
        if (nflags == flags)
                return 0;

        if (fcntl(fd, F_SETFD, nflags) < 0)
                return -errno;

        return 0;
}

_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
        size_t i;

        assert(n_fdset == 0 || fdset);

        for (i = 0; i < n_fdset; i++)
                if (fdset[i] == fd)
                        return true;

        return false;
}

static int get_max_fd(void) {
        struct rlimit rl;
        rlim_t m;

        /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary
         * and INT_MAX as upper boundary. */

        if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
                return -errno;

        m = MAX(rl.rlim_cur, rl.rlim_max);
        if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */
                return FD_SETSIZE-1;

        if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can
                                                * never be above INT_MAX */
                return INT_MAX;

        return (int) (m - 1);
}

int close_all_fds(const int except[], size_t n_except) {
        _cleanup_closedir_ DIR *d = NULL;
        struct dirent *de;
        int r = 0;

        assert(n_except == 0 || except);

        d = opendir("/proc/self/fd");
        if (!d) {
                int fd, max_fd;

                /* When /proc isn't available (for example in chroots) the fallback is brute forcing through
                 * the fd table */

                max_fd = get_max_fd();
                if (max_fd < 0)
                        return max_fd;

                /* Refuse to do the loop over more too many elements. It's better to fail immediately than to
                 * spin the CPU for a long time. */
                if (max_fd > MAX_FD_LOOP_LIMIT)
                        return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
                                               "/proc/self/fd is inaccessible. Refusing to loop over %d potential fds.",
                                               max_fd);

                for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
                        int q;

                        if (fd_in_set(fd, except, n_except))
                                continue;

                        q = close_nointr(fd);
                        if (q < 0 && q != -EBADF && r >= 0)
                                r = q;
                }

                return r;
        }

        FOREACH_DIRENT(de, d, return -errno) {
                int fd = -1, q;

                if (safe_atoi(de->d_name, &fd) < 0)
                        /* Let's better ignore this, just in case */
                        continue;

                if (fd < 3)
                        continue;

                if (fd == dirfd(d))
                        continue;

                if (fd_in_set(fd, except, n_except))
                        continue;

                q = close_nointr(fd);
                if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
                        r = q;
        }

        return r;
}

int same_fd(int a, int b) {
        struct stat sta, stb;
        pid_t pid;
        int r, fa, fb;

        assert(a >= 0);
        assert(b >= 0);

        /* Compares two file descriptors. Note that semantics are
         * quite different depending on whether we have kcmp() or we
         * don't. If we have kcmp() this will only return true for
         * dup()ed file descriptors, but not otherwise. If we don't
         * have kcmp() this will also return true for two fds of the same
         * file, created by separate open() calls. Since we use this
         * call mostly for filtering out duplicates in the fd store
         * this difference hopefully doesn't matter too much. */

        if (a == b)
                return true;

        /* Try to use kcmp() if we have it. */
        pid = getpid_cached();
        r = kcmp(pid, pid, KCMP_FILE, a, b);
        if (r == 0)
                return true;
        if (r > 0)
                return false;
        if (!IN_SET(errno, ENOSYS, EACCES, EPERM))
                return -errno;

        /* We don't have kcmp(), use fstat() instead. */
        if (fstat(a, &sta) < 0)
                return -errno;

        if (fstat(b, &stb) < 0)
                return -errno;

        if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
                return false;

        /* We consider all device fds different, since two device fds
         * might refer to quite different device contexts even though
         * they share the same inode and backing dev_t. */

        if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
                return false;

        if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
                return false;

        /* The fds refer to the same inode on disk, let's also check
         * if they have the same fd flags. This is useful to
         * distinguish the read and write side of a pipe created with
         * pipe(). */
        fa = fcntl(a, F_GETFL);
        if (fa < 0)
                return -errno;

        fb = fcntl(b, F_GETFL);
        if (fb < 0)
                return -errno;

        return fa == fb;
}

void cmsg_close_all(struct msghdr *mh) {
        struct cmsghdr *cmsg;

        assert(mh);

        CMSG_FOREACH(cmsg, mh)
                if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
                        close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
}

bool fdname_is_valid(const char *s) {
        const char *p;

        /* Validates a name for $LISTEN_FDNAMES. We basically allow
         * everything ASCII that's not a control character. Also, as
         * special exception the ":" character is not allowed, as we
         * use that as field separator in $LISTEN_FDNAMES.
         *
         * Note that the empty string is explicitly allowed
         * here. However, we limit the length of the names to 255
         * characters. */

        if (!s)
                return false;

        for (p = s; *p; p++) {
                if (*p < ' ')
                        return false;
                if (*p >= 127)
                        return false;
                if (*p == ':')
                        return false;
        }

        return p - s < 256;
}

int fd_get_path(int fd, char **ret) {
        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
        int r;

        xsprintf(procfs_path, "/proc/self/fd/%i", fd);
        r = readlink_malloc(procfs_path, ret);
        if (r == -ENOENT) {
                /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make
                 * things debuggable and distinguish the two. */

                if (access("/proc/self/fd/", F_OK) < 0)
                        /* /proc is not available or not set up properly, we're most likely in some chroot
                         * environment. */
                        return errno == ENOENT ? -EOPNOTSUPP : -errno;

                return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
        }

        return r;
}

int move_fd(int from, int to, int cloexec) {
        int r;

        /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
         * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
         * off, if it is > 0 it is turned on. */

        if (from < 0)
                return -EBADF;
        if (to < 0)
                return -EBADF;

        if (from == to) {

                if (cloexec >= 0) {
                        r = fd_cloexec(to, cloexec);
                        if (r < 0)
                                return r;
                }

                return to;
        }

        if (cloexec < 0) {
                int fl;

                fl = fcntl(from, F_GETFD, 0);
                if (fl < 0)
                        return -errno;

                cloexec = !!(fl & FD_CLOEXEC);
        }

        r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
        if (r < 0)
                return -errno;

        assert(r == to);

        safe_close(from);

        return to;
}

int acquire_data_fd(const void *data, size_t size, unsigned flags) {

        _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
        char pattern[] = "/dev/shm/data-fd-XXXXXX";
        _cleanup_close_ int fd = -1;
        int isz = 0, r;
        ssize_t n;
        off_t f;

        assert(data || size == 0);

        /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
         * complex than I wish it was. But here's why:
         *
         * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
         *    read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
         *
         * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
         *    a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
         *    clients can only bump their size to a system-wide limit, which might be quite low.
         *
         * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
         *    earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
         *    /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
         *
         * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
         *
         * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
         * figure. */

        if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
                /* As a special case, return /dev/null if we have been called for an empty data block */
                r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
                if (r < 0)
                        return -errno;

                return r;
        }

        if ((flags & ACQUIRE_NO_MEMFD) == 0) {
                fd = memfd_new("data-fd");
                if (fd < 0)
                        goto try_pipe;

                n = write(fd, data, size);
                if (n < 0)
                        return -errno;
                if ((size_t) n != size)
                        return -EIO;

                f = lseek(fd, 0, SEEK_SET);
                if (f != 0)
                        return -errno;

                r = memfd_set_sealed(fd);
                if (r < 0)
                        return r;

                return TAKE_FD(fd);
        }

try_pipe:
        if ((flags & ACQUIRE_NO_PIPE) == 0) {
                if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
                        return -errno;

                isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
                if (isz < 0)
                        return -errno;

                if ((size_t) isz < size) {
                        isz = (int) size;
                        if (isz < 0 || (size_t) isz != size)
                                return -E2BIG;

                        /* Try to bump the pipe size */
                        (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);

                        /* See if that worked */
                        isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
                        if (isz < 0)
                                return -errno;

                        if ((size_t) isz < size)
                                goto try_dev_shm;
                }

                n = write(pipefds[1], data, size);
                if (n < 0)
                        return -errno;
                if ((size_t) n != size)
                        return -EIO;

                (void) fd_nonblock(pipefds[0], false);

                return TAKE_FD(pipefds[0]);
        }

try_dev_shm:
        if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
                fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
                if (fd < 0)
                        goto try_dev_shm_without_o_tmpfile;

                n = write(fd, data, size);
                if (n < 0)
                        return -errno;
                if ((size_t) n != size)
                        return -EIO;

                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
                return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
        }

try_dev_shm_without_o_tmpfile:
        if ((flags & ACQUIRE_NO_REGULAR) == 0) {
                fd = mkostemp_safe(pattern);
                if (fd < 0)
                        return fd;

                n = write(fd, data, size);
                if (n < 0) {
                        r = -errno;
                        goto unlink_and_return;
                }
                if ((size_t) n != size) {
                        r = -EIO;
                        goto unlink_and_return;
                }

                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
                r = open(pattern, O_RDONLY|O_CLOEXEC);
                if (r < 0)
                        r = -errno;

        unlink_and_return:
                (void) unlink(pattern);
                return r;
        }

        return -EOPNOTSUPP;
}

/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
#define DATA_FD_MEMORY_LIMIT (64U*1024U)

/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
#define DATA_FD_TMP_LIMIT (1024U*1024U)

int fd_duplicate_data_fd(int fd) {

        _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
        _cleanup_free_ void *remains = NULL;
        size_t remains_size = 0;
        const char *td;
        struct stat st;
        int r;

        /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
         * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
         * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
         * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
         * /var/tmp. */

        if (fstat(fd, &st) < 0)
                return -errno;

        /* For now, let's only accept regular files, sockets, pipes and char devices */
        if (S_ISDIR(st.st_mode))
                return -EISDIR;
        if (S_ISLNK(st.st_mode))
                return -ELOOP;
        if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
                return -EBADFD;

        /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
         * that we use the reported regular file size only as a hint, given that there are plenty special files in
         * /proc and /sys which report a zero file size but can be read from. */

        if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {

                /* Try a memfd first */
                copy_fd = memfd_new("data-fd");
                if (copy_fd >= 0) {
                        off_t f;

                        r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
                        if (r < 0)
                                return r;

                        f = lseek(copy_fd, 0, SEEK_SET);
                        if (f != 0)
                                return -errno;

                        if (r == 0) {
                                /* Did it fit into the limit? If so, we are done. */
                                r = memfd_set_sealed(copy_fd);
                                if (r < 0)
                                        return r;

                                return TAKE_FD(copy_fd);
                        }

                        /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */

                } else {
                        _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
                        int isz;

                        /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
                         * then block indefinitely when we hit the pipe size limit */

                        if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
                                return -errno;

                        isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
                        if (isz < 0)
                                return -errno;

                        /* Try to enlarge the pipe size if necessary */
                        if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {

                                (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);

                                isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
                                if (isz < 0)
                                        return -errno;
                        }

                        if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {

                                r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
                                if (r < 0 && r != -EAGAIN)
                                        return r; /* If we get EAGAIN it could be because of the source or because of
                                                   * the destination fd, we can't know, as sendfile() and friends won't
                                                   * tell us. Hence, treat this as reason to fall back, just to be
                                                   * sure. */
                                if (r == 0) {
                                        /* Everything fit in, yay! */
                                        (void) fd_nonblock(pipefds[0], false);

                                        return TAKE_FD(pipefds[0]);
                                }

                                /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
                                 * when writing the new file we incorporate this first. */
                                copy_fd = TAKE_FD(pipefds[0]);
                        }
                }
        }

        /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
        if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
            (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
                off_t f;

                tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
                if (tmp_fd < 0)
                        return tmp_fd;

                if (copy_fd >= 0) {
                        /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
                         * temporary file first. */

                        r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
                        if (r < 0)
                                return r;

                        assert(r == 0);
                }

                if (remains_size > 0) {
                        /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
                         * failed copy operation, let's flush them out next. */

                        r = loop_write(tmp_fd, remains, remains_size, false);
                        if (r < 0)
                                return r;
                }

                r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
                if (r < 0)
                        return r;
                if (r == 0)
                        goto finish;  /* Yay, it fit in */

                /* It didn't fit in. Let's not forget to use what we already used */
                f = lseek(tmp_fd, 0, SEEK_SET);
                if (f != 0)
                        return -errno;

                safe_close(copy_fd);
                copy_fd = TAKE_FD(tmp_fd);

                remains = mfree(remains);
                remains_size = 0;
        }

        /* As last fallback use /var/tmp */
        r = var_tmp_dir(&td);
        if (r < 0)
                return r;

        tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
        if (tmp_fd < 0)
                return tmp_fd;

        if (copy_fd >= 0) {
                /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
                 * into the temporary file first. */
                r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
                if (r < 0)
                        return r;

                assert(r == 0);
        }

        if (remains_size > 0) {
                /* Then, copy in any read but not yet written bytes. */
                r = loop_write(tmp_fd, remains, remains_size, false);
                if (r < 0)
                        return r;
        }

        /* Copy in the rest */
        r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
        if (r < 0)
                return r;

        assert(r == 0);

finish:
        /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
         * file again */

        return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
}

int fd_move_above_stdio(int fd) {
        int flags, copy;
        PROTECT_ERRNO;

        /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
         * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
         * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
         * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
         * stdin/stdout/stderr of unrelated code.
         *
         * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
         * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
         * been closed before.
         *
         * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
         * error we simply return the original file descriptor, and we do not touch errno. */

        if (fd < 0 || fd > 2)
                return fd;

        flags = fcntl(fd, F_GETFD, 0);
        if (flags < 0)
                return fd;

        if (flags & FD_CLOEXEC)
                copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
        else
                copy = fcntl(fd, F_DUPFD, 3);
        if (copy < 0)
                return fd;

        assert(copy > 2);

        (void) close(fd);
        return copy;
}

int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {

        int fd[3] = { /* Put together an array of fds we work on */
                original_input_fd,
                original_output_fd,
                original_error_fd
        };

        int r, i,
                null_fd = -1,                /* if we open /dev/null, we store the fd to it here */
                copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
        bool null_readable, null_writable;

        /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
         * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
         * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
         * on.
         *
         * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
         * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
         *
         * Note that when this function fails stdin/stdout/stderr might remain half set up!
         *
         * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
         * stdin/stdout/stderr). */

        null_readable = original_input_fd < 0;
        null_writable = original_output_fd < 0 || original_error_fd < 0;

        /* First step, open /dev/null once, if we need it */
        if (null_readable || null_writable) {

                /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
                null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
                                             null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
                if (null_fd < 0) {
                        r = -errno;
                        goto finish;
                }

                /* If this fd is in the 0…2 range, let's move it out of it */
                if (null_fd < 3) {
                        int copy;

                        copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
                        if (copy < 0) {
                                r = -errno;
                                goto finish;
                        }

                        safe_close(null_fd);
                        null_fd = copy;
                }
        }

        /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
        for (i = 0; i < 3; i++) {

                if (fd[i] < 0)
                        fd[i] = null_fd;        /* A negative parameter means: connect this one to /dev/null */
                else if (fd[i] != i && fd[i] < 3) {
                        /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
                        copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
                        if (copy_fd[i] < 0) {
                                r = -errno;
                                goto finish;
                        }

                        fd[i] = copy_fd[i];
                }
        }

        /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
         * have freedom to move them around. If the fds already were at the right places then the specific fds are
         * -1. Let's now move them to the right places. This is the point of no return. */
        for (i = 0; i < 3; i++) {

                if (fd[i] == i) {

                        /* fd is already in place, but let's make sure O_CLOEXEC is off */
                        r = fd_cloexec(i, false);
                        if (r < 0)
                                goto finish;

                } else {
                        assert(fd[i] > 2);

                        if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
                                r = -errno;
                                goto finish;
                        }
                }
        }

        r = 0;

finish:
        /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
         * fd passed in multiple times. */
        safe_close_above_stdio(original_input_fd);
        if (original_output_fd != original_input_fd)
                safe_close_above_stdio(original_output_fd);
        if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
                safe_close_above_stdio(original_error_fd);

        /* Close the copies we moved > 2 */
        for (i = 0; i < 3; i++)
                safe_close(copy_fd[i]);

        /* Close our null fd, if it's > 2 */
        safe_close_above_stdio(null_fd);

        return r;
}

int fd_reopen(int fd, int flags) {
        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
        int new_fd;

        /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
         * turn O_RDWR fds into O_RDONLY fds.
         *
         * This doesn't work on sockets (since they cannot be open()ed, ever).
         *
         * This implicitly resets the file read index to 0. */

        xsprintf(procfs_path, "/proc/self/fd/%i", fd);
        new_fd = open(procfs_path, flags);
        if (new_fd < 0) {
                if (errno != ENOENT)
                        return -errno;

                if (proc_mounted() == 0)
                        return -ENOSYS; /* if we have no /proc/, the concept is not implementable */

                return -ENOENT;
        }

        return new_fd;
}

int read_nr_open(void) {
        _cleanup_free_ char *nr_open = NULL;
        int r;

        /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the
         * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */

        r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
        if (r < 0)
                log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
        else {
                int v;

                r = safe_atoi(nr_open, &v);
                if (r < 0)
                        log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
                else
                        return v;
        }

        /* If we fail, fall back to the hard-coded kernel limit of 1024 * 1024. */
        return 1024 * 1024;
}
-												Add SPDX license identifiers to source files under the LGPL

This follows what the kernel is doing, c.f.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5fd54ace4721fc5ce2bb5aef6318fcf17f421460.

											
										
										
											2017-11-18 17:09:20 +01:00
+								/* SPDX-License-Identifier: LGPL-2.1+ */
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include <errno.h>
 								#include <fcntl.h>
 								#include <sys/resource.h>
 								#include <sys/stat.h>
 								#include <unistd.h>
-												fd-util: add new helper call fd_duplicate_data_fd()

This call creates an fd from another fd containing the same data.
Specifically, repeated read() on the returned fd should return the same
data as the original fd. This call is useful when we want to copy data
out of disk images and suchlike, and want to be pass fds with the data
around without having to keep the disk image continously mounted.

The implementation tries to be somewhat smart and tries to prefer
memfds/pipes over files in /tmp or /var/tmp based on the size of the
data, but has appropropriate fallbacks in place.

											
										
										
											2018-03-09 22:45:08 +01:00
+								#include "alloc-util.h"
 								#include "copy.h"
-												tree-wide: replace all readdir cycles with FOREACH_DIRENT{,_ALL} (#4853)


											
										
										
											2016-12-09 10:04:30 +01:00
+								#include "dirent-util.h"
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								#include "fd-util.h"
-												fd-util: add new acquire_data_fd() API helper

All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.

Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.

											
										
										
											2017-10-27 10:56:42 +02:00
+								#include "fileio.h"
-												nspawn: when readjusting UID/GID ownership of OS trees, skip read-only subtrees

This should allow tools like rkt to pre-mount read-only subtrees in the OS
tree, without breaking the patching code.

Note that the code will still fail, if the top-level directory is already
read-only.

											
										
										
											2016-04-25 12:48:05 +02:00
+								#include "fs-util.h"
-												fd-util: add new helper call fd_duplicate_data_fd()

This call creates an fd from another fd containing the same data.
Specifically, repeated read() on the returned fd should return the same
data as the original fd. This call is useful when we want to copy data
out of disk images and suchlike, and want to be pass fds with the data
around without having to keep the disk image continously mounted.

The implementation tries to be somewhat smart and tries to prefer
memfds/pipes over files in /tmp or /var/tmp based on the size of the
data, but has appropropriate fallbacks in place.

											
										
										
											2018-03-09 22:45:08 +01:00
+								#include "io-util.h"
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include "macro.h"
-												fd-util: add new acquire_data_fd() API helper

All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.

Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.

											
										
										
											2017-10-27 10:56:42 +02:00
+								#include "memfd-util.h"
-												include missing_fcntl.h where needed

f5947a5e925117c55b390460d592f57504277bf9 dropped missing.h and
replaced with the more specific headers but did not add
missing_fcntl.h in places that use O_TMPFILE. This is needed for
some older versions of glibc.

											
										
										
											2019-11-07 07:25:43 +01:00
+								#include "missing_fcntl.h"
-												tree-wide: drop missing.h

											
										
										
											2019-10-31 03:07:23 +01:00
+								#include "missing_syscall.h"
-												basic: re-sort includes

My previous patch to only include what we use accidentially placed
the added inlcudes in non-sorted order.

											
										
										
											2015-12-01 23:22:03 +01:00
+								#include "parse-util.h"
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include "path-util.h"
-												tree-wide: make use of getpid_cached() wherever we can

This moves pretty much all uses of getpid() over to getpid_raw(). I
didn't specifically check whether the optimization is worth it for each
replacement, but in order to keep things simple and systematic I
switched over everything at once.

											
										
										
											2017-07-20 16:19:18 +02:00
+								#include "process-util.h"
-												basic: re-sort includes

My previous patch to only include what we use accidentially placed
the added inlcudes in non-sorted order.

											
										
										
											2015-12-01 23:22:03 +01:00
+								#include "socket-util.h"
-												tmpfiles: if we get ENOENT when opening /proc/self/fd/, check if /proc is mounted

let's return ENOSYS in that case, to make things a bit less confusng.

Previously we'd just propagate ENOENT, which people might mistake as
applying to the object being modified rather than /proc/ just not being
there.

Let's return ENOSYS instead, i.e. an error clearly indicating that some
kernel API is not available. This hopefully should put people on a
better track.

Note that we only do the procfs check in the error path, which hopefully
means it's the less likely path.

We probably can add similar bits to more suitable codepaths dealing with
/proc/self/fd, but for now, let's pick to the ones noticed in #14745.

Fixes: #14745

											
										
										
											2020-04-23 14:52:10 +02:00
+								#include "stat-util.h"
-												nspawn: when readjusting UID/GID ownership of OS trees, skip read-only subtrees

This should allow tools like rkt to pre-mount read-only subtrees in the OS
tree, without breaking the patching code.

Note that the code will still fail, if the top-level directory is already
read-only.

											
										
										
											2016-04-25 12:48:05 +02:00
+								#include "stdio-util.h"
-												util-lib: split out all temporary file related calls into tmpfiles-util.c

This splits out a bunch of functions from fileio.c that have to do with
temporary files. Simply to make the header files a bit shorter, and to
group things more nicely.

No code changes, just some rearranging of source files.

											
										
										
											2018-11-30 21:05:27 +01:00
+								#include "tmpfile-util.h"
-												tmpfiles: if we get ENOENT when opening /proc/self/fd/, check if /proc is mounted

let's return ENOSYS in that case, to make things a bit less confusng.

Previously we'd just propagate ENOENT, which people might mistake as
applying to the object being modified rather than /proc/ just not being
there.

Let's return ENOSYS instead, i.e. an error clearly indicating that some
kernel API is not available. This hopefully should put people on a
better track.

Note that we only do the procfs check in the error path, which hopefully
means it's the less likely path.

We probably can add similar bits to more suitable codepaths dealing with
/proc/self/fd, but for now, let's pick to the ones noticed in #14745.

Fixes: #14745

											
										
										
											2020-04-23 14:52:10 +02:00
+								#include "util.h"
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
-												basic/fd-util: refuse "infinite" loop in close_all_fds()

I had a test machine with ulimit -n set to 1073741816 through pam
("session required pam_limits.so set_all", which copies the limits from PID 1,
left over from testing of #10921).

test-execute would "hang" and then fail with a timeout when running
exec-inaccessiblepaths-proc.service. It turns out that the problem was in
close_all_fds(), which would go to the fallback path of doing close()
1073741813 times. Let's just fail if we hit this case. This only matters
for cases where both /proc is inaccessible, and the *soft* limit has been
raised.

  (gdb) bt
  #0  0x00007f7e2e73fdc8 in close () from target:/lib64/libc.so.6
  #1  0x00007f7e2e42cdfd in close_nointr ()
     from target:/home/zbyszek/src/systemd-work3/build-rawhide/src/shared/libsystemd-shared-241.so
  #2  0x00007f7e2e42d525 in close_all_fds ()
     from target:/home/zbyszek/src/systemd-work3/build-rawhide/src/shared/libsystemd-shared-241.so
  #3  0x0000000000426e53 in exec_child ()
  #4  0x0000000000429578 in exec_spawn ()
  #5  0x00000000004ce1ab in service_spawn ()
  #6  0x00000000004cff77 in service_enter_start ()
  #7  0x00000000004d028f in service_enter_start_pre ()
  #8  0x00000000004d16f2 in service_start ()
  #9  0x00000000004568f4 in unit_start ()
  #10 0x0000000000416987 in test ()
  #11 0x0000000000417632 in test_exec_inaccessiblepaths ()
  #12 0x0000000000419362 in run_tests ()
  #13 0x0000000000419632 in main ()

											
										
										
											2019-03-15 15:13:25 +01:00
+								/* The maximum number of iterations in the loop to close descriptors in the fallback case
 								 * when /proc/self/fd/ is inaccessible. */
 								#define MAX_FD_LOOP_LIMIT (1024*1024)
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								int close_nointr(int fd) {
 								        assert(fd >= 0);
 								        if (close(fd) >= 0)
 								                return 0;
 								        /*
 								         * Just ignore EINTR; a retry loop is the wrong thing to do on
 								         * Linux.
 								         *
 								         * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
 								         * https://bugzilla.gnome.org/show_bug.cgi?id=682819
 								         * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
 								         * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
 								         */
 								        if (errno == EINTR)
 								                return 0;
 								        return -errno;
 								}
 								int safe_close(int fd) {
 								        /*
 								         * Like close_nointr() but cannot fail. Guarantees errno is
 								         * unchanged. Is a NOP with negative fds passed, and returns
 								         * -1, so that it can be used in this syntax:
 								         *
 								         * fd = safe_close(fd);
 								         */
 								        if (fd >= 0) {
 								                PROTECT_ERRNO;
 								                /* The kernel might return pretty much any error code
 								                 * via close(), but the fd will be closed anyway. The
 								                 * only condition we want to check for here is whether
 								                 * the fd was invalid at all... */
 								                assert_se(close_nointr(fd) != -EBADF);
 								        }
 								        return -1;
 								}
-												tree-wide: use c99 static for array size declarations

https://hamberg.no/erlend/posts/2013-02-18-static-array-indices.html

This only works with clang, unfortunately gcc doesn't seem to implement the check
(tested with gcc-8.2.1-5.fc29.x86_64).

Simulated error:
[2/3] Compiling C object 'systemd-nspawn@exe/src_nspawn_nspawn.c.o'.
../src/nspawn/nspawn.c:3179:45: warning: array argument is too small; contains 15 elements, callee requires at least 16 [-Warray-bounds]
                        candidate = (uid_t) siphash24(arg_machine, strlen(arg_machine), hash_key);
                                            ^                                           ~~~~~~~~
../src/basic/siphash24.h:24:64: note: callee declares array parameter as static here
uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]);
                                                               ^~~~~~~~~~~~

											
										
										
											2019-01-04 12:30:45 +01:00
+								void safe_close_pair(int p[static 2]) {
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        assert(p);
 								        if (p[0] == p[1]) {
 								                /* Special case pairs which use the same fd in both
 								                 * directions... */
 								                p[0] = p[1] = safe_close(p[0]);
 								                return;
 								        }
 								        p[0] = safe_close(p[0]);
 								        p[1] = safe_close(p[1]);
 								}
-												tree-wide: be more careful with the type of array sizes

Previously we were a bit sloppy with the index and size types of arrays,
we'd regularly use unsigned. While I don't think this ever resulted in
real issues I think we should be more careful there and follow a
stricter regime: unless there's a strong reason not to use size_t for
array sizes and indexes, size_t it should be. Any allocations we do
ultimately will use size_t anyway, and converting forth and back between
unsigned and size_t will always be a source of problems.

Note that on 32bit machines "unsigned" and "size_t" are equivalent, and
on 64bit machines our arrays shouldn't grow that large anyway, and if
they do we have a problem, however that kind of overly large allocation
we have protections for usually, but for overflows we do not have that
so much, hence let's add it.

So yeah, it's a story of the current code being already "good enough",
but I think some extra type hygiene is better.

This patch tries to be comprehensive, but it probably isn't and I missed
a few cases. But I guess we can cover that later as we notice it. Among
smaller fixes, this changes:

1. strv_length()' return type becomes size_t

2. the unit file changes array size becomes size_t

3. DNS answer and query array sizes become size_t

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76745

											
										
										
											2018-04-27 14:09:31 +02:00
+								void close_many(const int fds[], size_t n_fd) {
 								        size_t i;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
 								        assert(fds || n_fd <= 0);
 								        for (i = 0; i < n_fd; i++)
 								                safe_close(fds[i]);
 								}
 								int fclose_nointr(FILE *f) {
 								        assert(f);
 								        /* Same as close_nointr(), but for fclose() */
-												fd-util: be more careful with fclose() errnos

This might fix #15859, a bug which I find very puzzling.

											
										
										
											2020-06-02 10:39:25 +02:00
+								        errno = 0; /* Extra safety: if the FILE* object is not encapsulating an fd, it might not set errno
 								                    * correctly. Let's hence initialize it to zero first, so that we aren't confused by any
 								                    * prior errno here */
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        if (fclose(f) == 0)
 								                return 0;
 								        if (errno == EINTR)
 								                return 0;
-												fd-util: be more careful with fclose() errnos

This might fix #15859, a bug which I find very puzzling.

											
										
										
											2020-06-02 10:39:25 +02:00
+								        return errno_or_else(EIO);
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								}
 								FILE* safe_fclose(FILE *f) {
 								        /* Same as safe_close(), but for fclose() */
 								        if (f) {
 								                PROTECT_ERRNO;
-												fd-util: Fix error handling in safe_fclose

Function fclose_nointr returns negative value on error.

											
										
										
											2018-11-30 12:35:23 +01:00
+								                assert_se(fclose_nointr(f) != -EBADF);
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        }
 								        return NULL;
 								}
 								DIR* safe_closedir(DIR *d) {
 								        if (d) {
 								                PROTECT_ERRNO;
 								                assert_se(closedir(d) >= 0 || errno != EBADF);
 								        }
 								        return NULL;
 								}
 								int fd_nonblock(int fd, bool nonblock) {
 								        int flags, nflags;
 								        assert(fd >= 0);
 								        flags = fcntl(fd, F_GETFL, 0);
 								        if (flags < 0)
 								                return -errno;
-												Add yet another tiny helper to manipulate flags

											
										
										
											2020-04-09 14:24:11 +02:00
+								        nflags = UPDATE_FLAG(flags, O_NONBLOCK, nonblock);
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        if (nflags == flags)
 								                return 0;
 								        if (fcntl(fd, F_SETFL, nflags) < 0)
 								                return -errno;
 								        return 0;
 								}
 								int fd_cloexec(int fd, bool cloexec) {
 								        int flags, nflags;
 								        assert(fd >= 0);
 								        flags = fcntl(fd, F_GETFD, 0);
 								        if (flags < 0)
 								                return -errno;
-												Add yet another tiny helper to manipulate flags

											
										
										
											2020-04-09 14:24:11 +02:00
+								        nflags = UPDATE_FLAG(flags, FD_CLOEXEC, cloexec);
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        if (nflags == flags)
 								                return 0;
 								        if (fcntl(fd, F_SETFD, nflags) < 0)
 								                return -errno;
 								        return 0;
 								}
-												tree-wide: be more careful with the type of array sizes

Previously we were a bit sloppy with the index and size types of arrays,
we'd regularly use unsigned. While I don't think this ever resulted in
real issues I think we should be more careful there and follow a
stricter regime: unless there's a strong reason not to use size_t for
array sizes and indexes, size_t it should be. Any allocations we do
ultimately will use size_t anyway, and converting forth and back between
unsigned and size_t will always be a source of problems.

Note that on 32bit machines "unsigned" and "size_t" are equivalent, and
on 64bit machines our arrays shouldn't grow that large anyway, and if
they do we have a problem, however that kind of overly large allocation
we have protections for usually, but for overflows we do not have that
so much, hence let's add it.

So yeah, it's a story of the current code being already "good enough",
but I think some extra type hygiene is better.

This patch tries to be comprehensive, but it probably isn't and I missed
a few cases. But I guess we can cover that later as we notice it. Among
smaller fixes, this changes:

1. strv_length()' return type becomes size_t

2. the unit file changes array size becomes size_t

3. DNS answer and query array sizes become size_t

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76745

											
										
										
											2018-04-27 14:09:31 +02:00
+								_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
 								        size_t i;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
 								        assert(n_fdset == 0 || fdset);
 								        for (i = 0; i < n_fdset; i++)
 								                if (fdset[i] == fd)
 								                        return true;
 								        return false;
 								}
-												fd-util: rework how we determine highest possible fd

											
										
										
											2019-01-17 12:23:21 +01:00
+								static int get_max_fd(void) {
 								        struct rlimit rl;
 								        rlim_t m;
 								        /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary
 								         * and INT_MAX as upper boundary. */
 								        if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
 								                return -errno;
 								        m = MAX(rl.rlim_cur, rl.rlim_max);
 								        if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */
 								                return FD_SETSIZE-1;
 								        if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can
 								                                                * never be above INT_MAX */
 								                return INT_MAX;
 								        return (int) (m - 1);
 								}
-												tree-wide: be more careful with the type of array sizes

Previously we were a bit sloppy with the index and size types of arrays,
we'd regularly use unsigned. While I don't think this ever resulted in
real issues I think we should be more careful there and follow a
stricter regime: unless there's a strong reason not to use size_t for
array sizes and indexes, size_t it should be. Any allocations we do
ultimately will use size_t anyway, and converting forth and back between
unsigned and size_t will always be a source of problems.

Note that on 32bit machines "unsigned" and "size_t" are equivalent, and
on 64bit machines our arrays shouldn't grow that large anyway, and if
they do we have a problem, however that kind of overly large allocation
we have protections for usually, but for overflows we do not have that
so much, hence let's add it.

So yeah, it's a story of the current code being already "good enough",
but I think some extra type hygiene is better.

This patch tries to be comprehensive, but it probably isn't and I missed
a few cases. But I guess we can cover that later as we notice it. Among
smaller fixes, this changes:

1. strv_length()' return type becomes size_t

2. the unit file changes array size becomes size_t

3. DNS answer and query array sizes become size_t

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76745

											
										
										
											2018-04-27 14:09:31 +02:00
+								int close_all_fds(const int except[], size_t n_except) {
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        _cleanup_closedir_ DIR *d = NULL;
 								        struct dirent *de;
 								        int r = 0;
 								        assert(n_except == 0 || except);
 								        d = opendir("/proc/self/fd");
 								        if (!d) {
-												basic: be more careful when closing fds based on RLIMIT_NOFILE

Let's make sure we properly handle cases where RLIMIT_NOFILE is set to
infinity, zero or values outside of the "int" range.

											
										
										
											2018-05-07 17:54:59 +02:00
+								                int fd, max_fd;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
-												fd-util: rework how we determine highest possible fd

											
										
										
											2019-01-17 12:23:21 +01:00
+								                /* When /proc isn't available (for example in chroots) the fallback is brute forcing through
 								                 * the fd table */
-												basic: be more careful when closing fds based on RLIMIT_NOFILE

Let's make sure we properly handle cases where RLIMIT_NOFILE is set to
infinity, zero or values outside of the "int" range.

											
										
										
											2018-05-07 17:54:59 +02:00
-												fd-util: rework how we determine highest possible fd

											
										
										
											2019-01-17 12:23:21 +01:00
+								                max_fd = get_max_fd();
 								                if (max_fd < 0)
 								                        return max_fd;
-												basic: be more careful when closing fds based on RLIMIT_NOFILE

Let's make sure we properly handle cases where RLIMIT_NOFILE is set to
infinity, zero or values outside of the "int" range.

											
										
										
											2018-05-07 17:54:59 +02:00
-												basic/fd-util: refuse "infinite" loop in close_all_fds()

I had a test machine with ulimit -n set to 1073741816 through pam
("session required pam_limits.so set_all", which copies the limits from PID 1,
left over from testing of #10921).

test-execute would "hang" and then fail with a timeout when running
exec-inaccessiblepaths-proc.service. It turns out that the problem was in
close_all_fds(), which would go to the fallback path of doing close()
1073741813 times. Let's just fail if we hit this case. This only matters
for cases where both /proc is inaccessible, and the *soft* limit has been
raised.

  (gdb) bt
  #0  0x00007f7e2e73fdc8 in close () from target:/lib64/libc.so.6
  #1  0x00007f7e2e42cdfd in close_nointr ()
     from target:/home/zbyszek/src/systemd-work3/build-rawhide/src/shared/libsystemd-shared-241.so
  #2  0x00007f7e2e42d525 in close_all_fds ()
     from target:/home/zbyszek/src/systemd-work3/build-rawhide/src/shared/libsystemd-shared-241.so
  #3  0x0000000000426e53 in exec_child ()
  #4  0x0000000000429578 in exec_spawn ()
  #5  0x00000000004ce1ab in service_spawn ()
  #6  0x00000000004cff77 in service_enter_start ()
  #7  0x00000000004d028f in service_enter_start_pre ()
  #8  0x00000000004d16f2 in service_start ()
  #9  0x00000000004568f4 in unit_start ()
  #10 0x0000000000416987 in test ()
  #11 0x0000000000417632 in test_exec_inaccessiblepaths ()
  #12 0x0000000000419362 in run_tests ()
  #13 0x0000000000419632 in main ()

											
										
										
											2019-03-15 15:13:25 +01:00
+								                /* Refuse to do the loop over more too many elements. It's better to fail immediately than to
 								                 * spin the CPU for a long time. */
 								                if (max_fd > MAX_FD_LOOP_LIMIT)
 								                        return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
 								                                               "/proc/self/fd is inaccessible. Refusing to loop over %d potential fds.",
 								                                               max_fd);
-												basic: be more careful when closing fds based on RLIMIT_NOFILE

Let's make sure we properly handle cases where RLIMIT_NOFILE is set to
infinity, zero or values outside of the "int" range.

											
										
										
											2018-05-07 17:54:59 +02:00
+								                for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
-												fd-util: use close_nointr() return value instead of errno

Our own calls return errors in their return values, hence use that
rather than errno when checking errors.

											
										
										
											2017-12-22 13:04:24 +01:00
+								                        int q;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
 								                        if (fd_in_set(fd, except, n_except))
 								                                continue;
-												fd-util: use close_nointr() return value instead of errno

Our own calls return errors in their return values, hence use that
rather than errno when checking errors.

											
										
										
											2017-12-22 13:04:24 +01:00
+								                        q = close_nointr(fd);
 								                        if (q < 0 && q != -EBADF && r >= 0)
 								                                r = q;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								                }
 								                return r;
 								        }
-												tree-wide: replace all readdir cycles with FOREACH_DIRENT{,_ALL} (#4853)


											
										
										
											2016-12-09 10:04:30 +01:00
+								        FOREACH_DIRENT(de, d, return -errno) {
-												fd-util: use close_nointr() return value instead of errno

Our own calls return errors in their return values, hence use that
rather than errno when checking errors.

											
										
										
											2017-12-22 13:04:24 +01:00
+								                int fd = -1, q;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
 								                if (safe_atoi(de->d_name, &fd) < 0)
 								                        /* Let's better ignore this, just in case */
 								                        continue;
 								                if (fd < 3)
 								                        continue;
 								                if (fd == dirfd(d))
 								                        continue;
 								                if (fd_in_set(fd, except, n_except))
 								                        continue;
-												fd-util: use close_nointr() return value instead of errno

Our own calls return errors in their return values, hence use that
rather than errno when checking errors.

											
										
										
											2017-12-22 13:04:24 +01:00
+								                q = close_nointr(fd);
 								                if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
 								                        r = q;
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        }
 								        return r;
 								}
 								int same_fd(int a, int b) {
 								        struct stat sta, stb;
 								        pid_t pid;
 								        int r, fa, fb;
 								        assert(a >= 0);
 								        assert(b >= 0);
 								        /* Compares two file descriptors. Note that semantics are
 								         * quite different depending on whether we have kcmp() or we
 								         * don't. If we have kcmp() this will only return true for
 								         * dup()ed file descriptors, but not otherwise. If we don't
 								         * have kcmp() this will also return true for two fds of the same
 								         * file, created by separate open() calls. Since we use this
 								         * call mostly for filtering out duplicates in the fd store
 								         * this difference hopefully doesn't matter too much. */
 								        if (a == b)
 								                return true;
 								        /* Try to use kcmp() if we have it. */
-												tree-wide: make use of getpid_cached() wherever we can

This moves pretty much all uses of getpid() over to getpid_raw(). I
didn't specifically check whether the optimization is worth it for each
replacement, but in order to keep things simple and systematic I
switched over everything at once.

											
										
										
											2017-07-20 16:19:18 +02:00
+								        pid = getpid_cached();
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								        r = kcmp(pid, pid, KCMP_FILE, a, b);
 								        if (r == 0)
 								                return true;
 								        if (r > 0)
 								                return false;
-												fd-util: accept that kcmp might fail with EPERM/EACCES

In a container the kcmp call might well be blocked; Accept that and fall
back to fstat in that case.

											
										
										
											2018-08-28 09:32:18 +02:00
+								        if (!IN_SET(errno, ENOSYS, EACCES, EPERM))
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								                return -errno;
 								        /* We don't have kcmp(), use fstat() instead. */
 								        if (fstat(a, &sta) < 0)
 								                return -errno;
 								        if (fstat(b, &stb) < 0)
 								                return -errno;
 								        if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
 								                return false;
 								        /* We consider all device fds different, since two device fds
 								         * might refer to quite different device contexts even though
 								         * they share the same inode and backing dev_t. */
 								        if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
 								                return false;
 								        if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
 								                return false;
 								        /* The fds refer to the same inode on disk, let's also check
 								         * if they have the same fd flags. This is useful to
 								         * distinguish the read and write side of a pipe created with
 								         * pipe(). */
 								        fa = fcntl(a, F_GETFL);
 								        if (fa < 0)
 								                return -errno;
 								        fb = fcntl(b, F_GETFL);
 								        if (fb < 0)
 								                return -errno;
 								        return fa == fb;
 								}
 								void cmsg_close_all(struct msghdr *mh) {
 								        struct cmsghdr *cmsg;
 								        assert(mh);
 								        CMSG_FOREACH(cmsg, mh)
 								                if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
 								                        close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
 								}
-												util-lib: move fdname_is_valid() to fd-util.[ch]

											
										
										
											2015-10-27 14:57:44 +01:00
 								bool fdname_is_valid(const char *s) {
 								        const char *p;
 								        /* Validates a name for $LISTEN_FDNAMES. We basically allow
 								         * everything ASCII that's not a control character. Also, as
 								         * special exception the ":" character is not allowed, as we
 								         * use that as field separator in $LISTEN_FDNAMES.
 								         *
 								         * Note that the empty string is explicitly allowed
 								         * here. However, we limit the length of the names to 255
 								         * characters. */
 								        if (!s)
 								                return false;
 								        for (p = s; *p; p++) {
 								                if (*p < ' ')
 								                        return false;
 								                if (*p >= 127)
 								                        return false;
 								                if (*p == ':')
 								                        return false;
 								        }
 								        return p - s < 256;
 								}
-												nspawn: when readjusting UID/GID ownership of OS trees, skip read-only subtrees

This should allow tools like rkt to pre-mount read-only subtrees in the OS
tree, without breaking the patching code.

Note that the code will still fail, if the top-level directory is already
read-only.

											
										
										
											2016-04-25 12:48:05 +02:00
 								int fd_get_path(int fd, char **ret) {
-												fd-util: optimize fd_get_path() a bit

journald calls fd_get_path() a lot (it probably shouldn't, there's some
room for improvement there, but I'll leave that for another time), hence
it's worth optimizing the call a bit, in particular as it's easy.

Previously we'd open the dir /proc/self/fd/ first, before reading the
symlink inside it. This means the whole function requires three system
calls: open(), readlinkat(), close(). The reason for doing it this way
is to distinguish the case when we see ENOENT because /proc is not
mounted and the case when the fd doesn't exist.

With this change we'll directly go for the readlink(), and only if that
fails do an access() to see if /proc is mounted at all.

This optimizes the common case (where the fd is valid and /proc
mounted), in favour of the uncommon case (where the fd doesn#t exist or
/proc is not mounted).

											
										
										
											2018-10-25 21:27:00 +02:00
+								        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
-												journal: when creating a new journal file, fsync() the directory it is created in too

Fixes: #2831

											
										
										
											2016-04-26 15:47:55 +02:00
+								        int r;
-												nspawn: when readjusting UID/GID ownership of OS trees, skip read-only subtrees

This should allow tools like rkt to pre-mount read-only subtrees in the OS
tree, without breaking the patching code.

Note that the code will still fail, if the top-level directory is already
read-only.

											
										
										
											2016-04-25 12:48:05 +02:00
-												fd-util: optimize fd_get_path() a bit

journald calls fd_get_path() a lot (it probably shouldn't, there's some
room for improvement there, but I'll leave that for another time), hence
it's worth optimizing the call a bit, in particular as it's easy.

Previously we'd open the dir /proc/self/fd/ first, before reading the
symlink inside it. This means the whole function requires three system
calls: open(), readlinkat(), close(). The reason for doing it this way
is to distinguish the case when we see ENOENT because /proc is not
mounted and the case when the fd doesn't exist.

With this change we'll directly go for the readlink(), and only if that
fails do an access() to see if /proc is mounted at all.

This optimizes the common case (where the fd is valid and /proc
mounted), in favour of the uncommon case (where the fd doesn#t exist or
/proc is not mounted).

											
										
										
											2018-10-25 21:27:00 +02:00
+								        xsprintf(procfs_path, "/proc/self/fd/%i", fd);
 								        r = readlink_malloc(procfs_path, ret);
 								        if (r == -ENOENT) {
 								                /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make
-												codespell: fix spelling errors

											
										
										
											2019-04-27 02:22:40 +02:00
+								                 * things debuggable and distinguish the two. */
-												nspawn: when readjusting UID/GID ownership of OS trees, skip read-only subtrees

This should allow tools like rkt to pre-mount read-only subtrees in the OS
tree, without breaking the patching code.

Note that the code will still fail, if the top-level directory is already
read-only.

											
										
										
											2016-04-25 12:48:05 +02:00
-												fd-util: optimize fd_get_path() a bit

journald calls fd_get_path() a lot (it probably shouldn't, there's some
room for improvement there, but I'll leave that for another time), hence
it's worth optimizing the call a bit, in particular as it's easy.

Previously we'd open the dir /proc/self/fd/ first, before reading the
symlink inside it. This means the whole function requires three system
calls: open(), readlinkat(), close(). The reason for doing it this way
is to distinguish the case when we see ENOENT because /proc is not
mounted and the case when the fd doesn't exist.

With this change we'll directly go for the readlink(), and only if that
fails do an access() to see if /proc is mounted at all.

This optimizes the common case (where the fd is valid and /proc
mounted), in favour of the uncommon case (where the fd doesn#t exist or
/proc is not mounted).

											
										
										
											2018-10-25 21:27:00 +02:00
+								                if (access("/proc/self/fd/", F_OK) < 0)
 								                        /* /proc is not available or not set up properly, we're most likely in some chroot
 								                         * environment. */
 								                        return errno == ENOENT ? -EOPNOTSUPP : -errno;
-												journal: when creating a new journal file, fsync() the directory it is created in too

Fixes: #2831

											
										
										
											2016-04-26 15:47:55 +02:00
-												fd-util: optimize fd_get_path() a bit

journald calls fd_get_path() a lot (it probably shouldn't, there's some
room for improvement there, but I'll leave that for another time), hence
it's worth optimizing the call a bit, in particular as it's easy.

Previously we'd open the dir /proc/self/fd/ first, before reading the
symlink inside it. This means the whole function requires three system
calls: open(), readlinkat(), close(). The reason for doing it this way
is to distinguish the case when we see ENOENT because /proc is not
mounted and the case when the fd doesn't exist.

With this change we'll directly go for the readlink(), and only if that
fails do an access() to see if /proc is mounted at all.

This optimizes the common case (where the fd is valid and /proc
mounted), in favour of the uncommon case (where the fd doesn#t exist or
/proc is not mounted).

											
										
										
											2018-10-25 21:27:00 +02:00
+								                return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
 								        }
-												journal: when creating a new journal file, fsync() the directory it is created in too

Fixes: #2831

											
										
										
											2016-04-26 15:47:55 +02:00
 								        return r;
-												nspawn: when readjusting UID/GID ownership of OS trees, skip read-only subtrees

This should allow tools like rkt to pre-mount read-only subtrees in the OS
tree, without breaking the patching code.

Note that the code will still fail, if the top-level directory is already
read-only.

											
										
										
											2016-04-25 12:48:05 +02:00
+								}
-												fd-util: add new helper move_fd() and make use of it

We are using the same pattern at various places: call dup2() on an fd,
and close the old fd, usually in combination with some O_CLOEXEC
fiddling. Let's add a little helper for this, and port a few obvious
cases over.

											
										
										
											2017-10-26 18:45:54 +02:00
 								int move_fd(int from, int to, int cloexec) {
 								        int r;
 								        /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
 								         * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
 								         * off, if it is > 0 it is turned on. */
 								        if (from < 0)
 								                return -EBADF;
 								        if (to < 0)
 								                return -EBADF;
 								        if (from == to) {
 								                if (cloexec >= 0) {
 								                        r = fd_cloexec(to, cloexec);
 								                        if (r < 0)
 								                                return r;
 								                }
 								                return to;
 								        }
 								        if (cloexec < 0) {
 								                int fl;
 								                fl = fcntl(from, F_GETFD, 0);
 								                if (fl < 0)
 								                        return -errno;
 								                cloexec = !!(fl & FD_CLOEXEC);
 								        }
 								        r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
 								        if (r < 0)
 								                return -errno;
 								        assert(r == to);
 								        safe_close(from);
 								        return to;
 								}
-												fd-util: add new acquire_data_fd() API helper

All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.

Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.

											
										
										
											2017-10-27 10:56:42 +02:00
 								int acquire_data_fd(const void *data, size_t size, unsigned flags) {
 								        _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
 								        char pattern[] = "/dev/shm/data-fd-XXXXXX";
 								        _cleanup_close_ int fd = -1;
 								        int isz = 0, r;
 								        ssize_t n;
 								        off_t f;
 								        assert(data || size == 0);
 								        /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
 								         * complex than I wish it was. But here's why:
 								         *
 								         * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
 								         *    read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
 								         *
 								         * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
 								         *    a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
 								         *    clients can only bump their size to a system-wide limit, which might be quite low.
 								         *
 								         * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
 								         *    earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
 								         *    /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
 								         *
 								         * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
 								         *
 								         * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
 								         * figure. */
 								        if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
 								                /* As a special case, return /dev/null if we have been called for an empty data block */
 								                r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 								                if (r < 0)
 								                        return -errno;
 								                return r;
 								        }
 								        if ((flags & ACQUIRE_NO_MEMFD) == 0) {
 								                fd = memfd_new("data-fd");
 								                if (fd < 0)
 								                        goto try_pipe;
 								                n = write(fd, data, size);
 								                if (n < 0)
 								                        return -errno;
 								                if ((size_t) n != size)
 								                        return -EIO;
 								                f = lseek(fd, 0, SEEK_SET);
 								                if (f != 0)
 								                        return -errno;
 								                r = memfd_set_sealed(fd);
 								                if (r < 0)
 								                        return r;
-												macro: introduce new TAKE_FD() macro

This is similar to TAKE_PTR() but operates on file descriptors, and thus
assigns -1 to the fd parameter after returning it.

Removes 60 lines from our codebase. Pretty good too I think.

											
										
										
											2018-03-22 17:04:29 +01:00
+								                return TAKE_FD(fd);
-												fd-util: add new acquire_data_fd() API helper

All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.

Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.

											
										
										
											2017-10-27 10:56:42 +02:00
+								        }
 								try_pipe:
 								        if ((flags & ACQUIRE_NO_PIPE) == 0) {
 								                if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
 								                        return -errno;
 								                isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
 								                if (isz < 0)
 								                        return -errno;
 								                if ((size_t) isz < size) {
 								                        isz = (int) size;
 								                        if (isz < 0 || (size_t) isz != size)
 								                                return -E2BIG;
 								                        /* Try to bump the pipe size */
 								                        (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
 								                        /* See if that worked */
 								                        isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
 								                        if (isz < 0)
 								                                return -errno;
 								                        if ((size_t) isz < size)
 								                                goto try_dev_shm;
 								                }
 								                n = write(pipefds[1], data, size);
 								                if (n < 0)
 								                        return -errno;
 								                if ((size_t) n != size)
 								                        return -EIO;
 								                (void) fd_nonblock(pipefds[0], false);
-												macro: introduce new TAKE_FD() macro

This is similar to TAKE_PTR() but operates on file descriptors, and thus
assigns -1 to the fd parameter after returning it.

Removes 60 lines from our codebase. Pretty good too I think.

											
										
										
											2018-03-22 17:04:29 +01:00
+								                return TAKE_FD(pipefds[0]);
-												fd-util: add new acquire_data_fd() API helper

All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.

Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.

											
										
										
											2017-10-27 10:56:42 +02:00
+								        }
 								try_dev_shm:
 								        if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
 								                fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
 								                if (fd < 0)
 								                        goto try_dev_shm_without_o_tmpfile;
 								                n = write(fd, data, size);
 								                if (n < 0)
 								                        return -errno;
 								                if ((size_t) n != size)
 								                        return -EIO;
 								                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
-												fd-util: introduce fd_reopen() helper for reopening an fd

We have the same code for this in place at various locations, let's
unify that. Also, let's repurpose test-fs-util.c as a test for this new
helper cal..

											
										
										
											2018-03-26 13:25:51 +02:00
+								                return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
-												fd-util: add new acquire_data_fd() API helper

All this function does is place some data in an in-memory read-only fd,
that may be read back to get the original data back.

Doing this in a way that works everywhere, given the different kernels
we support as well as different privilege levels is surprisingly
complex.

											
										
										
											2017-10-27 10:56:42 +02:00
+								        }
 								try_dev_shm_without_o_tmpfile:
 								        if ((flags & ACQUIRE_NO_REGULAR) == 0) {
 								                fd = mkostemp_safe(pattern);
 								                if (fd < 0)
 								                        return fd;
 								                n = write(fd, data, size);
 								                if (n < 0) {
 								                        r = -errno;
 								                        goto unlink_and_return;
 								                }
 								                if ((size_t) n != size) {
 								                        r = -EIO;
 								                        goto unlink_and_return;
 								                }
 								                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
 								                r = open(pattern, O_RDONLY|O_CLOEXEC);
 								                if (r < 0)
 								                        r = -errno;
 								        unlink_and_return:
 								                (void) unlink(pattern);
 								                return r;
 								        }
 								        return -EOPNOTSUPP;
 								}
-												fd-util: move certain fds above fd #2 (#8129)

This adds some paranoia code that moves some of the fds we allocate for
longer periods of times to fds > 2 if they are allocated below this
boundary. This is a paranoid safety thing, in order to avoid that
external code might end up erroneously use our fds under the assumption
they were valid stdin/stdout/stderr. Think: some app closes
stdin/stdout/stderr and then invokes 'fprintf(stderr, …' which causes
writes on our fds.

This both adds the helper to do the moving as well as ports over a
number of users to this new logic. Since we don't want to litter all our
code with invocations of this I tried to strictly focus on fds we keep
open for long periods of times only and only in code that is frequently
loaded into foreign programs (under the assumptions that in our own
codebase we are smart enough to always keep stdin/stdout/stderr
allocated to avoid this pitfall). Specifically this means all code used
by NSS and our sd-xyz API:

1. our logging APIs
2. sd-event
3. sd-bus
4. sd-resolve
5. sd-netlink

This changed was inspired by this:

https://github.com/systemd/systemd/issues/8075#issuecomment-363689755

This shows that apparently IRL there are programs that do close
stdin/stdout/stderr, and we should accomodate for that.

Note that this won't fix any bugs, this just makes sure that buggy
programs are less likely to interfere with out own code.
											
										
										
											2018-02-09 17:53:28 +01:00
-												fd-util: add new helper call fd_duplicate_data_fd()

This call creates an fd from another fd containing the same data.
Specifically, repeated read() on the returned fd should return the same
data as the original fd. This call is useful when we want to copy data
out of disk images and suchlike, and want to be pass fds with the data
around without having to keep the disk image continously mounted.

The implementation tries to be somewhat smart and tries to prefer
memfds/pipes over files in /tmp or /var/tmp based on the size of the
data, but has appropropriate fallbacks in place.

											
										
										
											2018-03-09 22:45:08 +01:00
+								/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
 								#define DATA_FD_MEMORY_LIMIT (64U*1024U)
 								/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
 								#define DATA_FD_TMP_LIMIT (1024U*1024U)
 								int fd_duplicate_data_fd(int fd) {
 								        _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
 								        _cleanup_free_ void *remains = NULL;
 								        size_t remains_size = 0;
 								        const char *td;
 								        struct stat st;
 								        int r;
 								        /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
 								         * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
 								         * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
 								         * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
 								         * /var/tmp. */
 								        if (fstat(fd, &st) < 0)
 								                return -errno;
 								        /* For now, let's only accept regular files, sockets, pipes and char devices */
 								        if (S_ISDIR(st.st_mode))
 								                return -EISDIR;
 								        if (S_ISLNK(st.st_mode))
 								                return -ELOOP;
 								        if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
 								                return -EBADFD;
 								        /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
 								         * that we use the reported regular file size only as a hint, given that there are plenty special files in
 								         * /proc and /sys which report a zero file size but can be read from. */
 								        if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
 								                /* Try a memfd first */
 								                copy_fd = memfd_new("data-fd");
 								                if (copy_fd >= 0) {
 								                        off_t f;
 								                        r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
 								                        if (r < 0)
 								                                return r;
 								                        f = lseek(copy_fd, 0, SEEK_SET);
 								                        if (f != 0)
 								                                return -errno;
 								                        if (r == 0) {
 								                                /* Did it fit into the limit? If so, we are done. */
 								                                r = memfd_set_sealed(copy_fd);
 								                                if (r < 0)
 								                                        return r;
 								                                return TAKE_FD(copy_fd);
 								                        }
 								                        /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
 								                } else {
 								                        _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
 								                        int isz;
 								                        /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
 								                         * then block indefinitely when we hit the pipe size limit */
 								                        if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
 								                                return -errno;
 								                        isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
 								                        if (isz < 0)
 								                                return -errno;
 								                        /* Try to enlarge the pipe size if necessary */
 								                        if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
 								                                (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
 								                                isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
 								                                if (isz < 0)
 								                                        return -errno;
 								                        }
 								                        if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
-												copy: support getting progress feedback from the various copy functions

This adds two optional functions that may be passed to the various copy
functions. One is invoked whenever we start copying a new file object,
the other while we copy file payload in each loop iteration.

When the caller passes one or both they can get notifications about copy
progress, for example to log where things are.

											
										
										
											2018-10-10 21:03:30 +02:00
+								                                r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
-												fd-util: add new helper call fd_duplicate_data_fd()

This call creates an fd from another fd containing the same data.
Specifically, repeated read() on the returned fd should return the same
data as the original fd. This call is useful when we want to copy data
out of disk images and suchlike, and want to be pass fds with the data
around without having to keep the disk image continously mounted.

The implementation tries to be somewhat smart and tries to prefer
memfds/pipes over files in /tmp or /var/tmp based on the size of the
data, but has appropropriate fallbacks in place.

											
										
										
											2018-03-09 22:45:08 +01:00
+								                                if (r < 0 && r != -EAGAIN)
 								                                        return r; /* If we get EAGAIN it could be because of the source or because of
 								                                                   * the destination fd, we can't know, as sendfile() and friends won't
 								                                                   * tell us. Hence, treat this as reason to fall back, just to be
 								                                                   * sure. */
 								                                if (r == 0) {
 								                                        /* Everything fit in, yay! */
 								                                        (void) fd_nonblock(pipefds[0], false);
 								                                        return TAKE_FD(pipefds[0]);
 								                                }
 								                                /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
 								                                 * when writing the new file we incorporate this first. */
 								                                copy_fd = TAKE_FD(pipefds[0]);
 								                        }
 								                }
 								        }
 								        /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
 								        if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
 								            (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
 								                off_t f;
 								                tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
 								                if (tmp_fd < 0)
 								                        return tmp_fd;
 								                if (copy_fd >= 0) {
 								                        /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
 								                         * temporary file first. */
 								                        r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
 								                        if (r < 0)
 								                                return r;
 								                        assert(r == 0);
 								                }
 								                if (remains_size > 0) {
 								                        /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
 								                         * failed copy operation, let's flush them out next. */
 								                        r = loop_write(tmp_fd, remains, remains_size, false);
 								                        if (r < 0)
 								                                return r;
 								                }
 								                r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
 								                if (r < 0)
 								                        return r;
 								                if (r == 0)
 								                        goto finish;  /* Yay, it fit in */
 								                /* It didn't fit in. Let's not forget to use what we already used */
 								                f = lseek(tmp_fd, 0, SEEK_SET);
 								                if (f != 0)
 								                        return -errno;
 								                safe_close(copy_fd);
 								                copy_fd = TAKE_FD(tmp_fd);
 								                remains = mfree(remains);
 								                remains_size = 0;
 								        }
 								        /* As last fallback use /var/tmp */
 								        r = var_tmp_dir(&td);
 								        if (r < 0)
 								                return r;
 								        tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
 								        if (tmp_fd < 0)
 								                return tmp_fd;
 								        if (copy_fd >= 0) {
 								                /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
 								                 * into the temporary file first. */
 								                r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
 								                if (r < 0)
 								                        return r;
 								                assert(r == 0);
 								        }
 								        if (remains_size > 0) {
 								                /* Then, copy in any read but not yet written bytes. */
 								                r = loop_write(tmp_fd, remains, remains_size, false);
 								                if (r < 0)
 								                        return r;
 								        }
 								        /* Copy in the rest */
 								        r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
 								        if (r < 0)
 								                return r;
 								        assert(r == 0);
 								finish:
 								        /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
 								         * file again */
 								        return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
 								}
-												fd-util: move certain fds above fd #2 (#8129)

This adds some paranoia code that moves some of the fds we allocate for
longer periods of times to fds > 2 if they are allocated below this
boundary. This is a paranoid safety thing, in order to avoid that
external code might end up erroneously use our fds under the assumption
they were valid stdin/stdout/stderr. Think: some app closes
stdin/stdout/stderr and then invokes 'fprintf(stderr, …' which causes
writes on our fds.

This both adds the helper to do the moving as well as ports over a
number of users to this new logic. Since we don't want to litter all our
code with invocations of this I tried to strictly focus on fds we keep
open for long periods of times only and only in code that is frequently
loaded into foreign programs (under the assumptions that in our own
codebase we are smart enough to always keep stdin/stdout/stderr
allocated to avoid this pitfall). Specifically this means all code used
by NSS and our sd-xyz API:

1. our logging APIs
2. sd-event
3. sd-bus
4. sd-resolve
5. sd-netlink

This changed was inspired by this:

https://github.com/systemd/systemd/issues/8075#issuecomment-363689755

This shows that apparently IRL there are programs that do close
stdin/stdout/stderr, and we should accomodate for that.

Note that this won't fix any bugs, this just makes sure that buggy
programs are less likely to interfere with out own code.
											
										
										
											2018-02-09 17:53:28 +01:00
+								int fd_move_above_stdio(int fd) {
 								        int flags, copy;
 								        PROTECT_ERRNO;
 								        /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
 								         * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
 								         * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
 								         * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
 								         * stdin/stdout/stderr of unrelated code.
 								         *
 								         * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
 								         * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
 								         * been closed before.
 								         *
 								         * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
 								         * error we simply return the original file descriptor, and we do not touch errno. */
 								        if (fd < 0 || fd > 2)
 								                return fd;
 								        flags = fcntl(fd, F_GETFD, 0);
 								        if (flags < 0)
 								                return fd;
 								        if (flags & FD_CLOEXEC)
 								                copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
 								        else
 								                copy = fcntl(fd, F_DUPFD, 3);
 								        if (copy < 0)
 								                return fd;
 								        assert(copy > 2);
 								        (void) close(fd);
 								        return copy;
 								}
-												fd-util: add new call rearrange_stdio()

Quite often we need to set up a number of fds as stdin/stdout/stderr of
a process we are about to start. Add a generic implementation for a
routine doing that that takes care to do so properly:

1. Can handle the case where stdin/stdout/stderr where previously
   closed, and the fds to set as stdin/stdout/stderr hence likely in the
   0..2 range.  handling this properly is nasty, since we need to first
   move the fds out of this range in order to later move them back in, to
   make things fully robust.

2. Can optionally open /dev/null in case for one or more of the fds, in
   a smart way, sharing the open file if possible between multiple of
   the fds.

3. Guarantees that O_CLOEXEC is not set on the three fds, even if the fds
   already were in the 0..2 range and hence possibly weren't moved.

											
										
										
											2018-02-28 10:00:26 +01:00
 								int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
 								        int fd[3] = { /* Put together an array of fds we work on */
 								                original_input_fd,
 								                original_output_fd,
 								                original_error_fd
 								        };
 								        int r, i,
 								                null_fd = -1,                /* if we open /dev/null, we store the fd to it here */
 								                copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
 								        bool null_readable, null_writable;
 								        /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
 								         * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
 								         * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
 								         * on.
 								         *
 								         * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
 								         * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
 								         *
 								         * Note that when this function fails stdin/stdout/stderr might remain half set up!
 								         *
 								         * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
 								         * stdin/stdout/stderr). */
 								        null_readable = original_input_fd < 0;
 								        null_writable = original_output_fd < 0 || original_error_fd < 0;
 								        /* First step, open /dev/null once, if we need it */
 								        if (null_readable || null_writable) {
 								                /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
 								                null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
 								                                             null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
 								                if (null_fd < 0) {
 								                        r = -errno;
 								                        goto finish;
 								                }
 								                /* If this fd is in the 0…2 range, let's move it out of it */
 								                if (null_fd < 3) {
 								                        int copy;
 								                        copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
 								                        if (copy < 0) {
 								                                r = -errno;
 								                                goto finish;
 								                        }
 								                        safe_close(null_fd);
 								                        null_fd = copy;
 								                }
 								        }
 								        /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
 								        for (i = 0; i < 3; i++) {
 								                if (fd[i] < 0)
 								                        fd[i] = null_fd;        /* A negative parameter means: connect this one to /dev/null */
 								                else if (fd[i] != i && fd[i] < 3) {
 								                        /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
 								                        copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
 								                        if (copy_fd[i] < 0) {
 								                                r = -errno;
 								                                goto finish;
 								                        }
 								                        fd[i] = copy_fd[i];
 								                }
 								        }
 								        /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
 								         * have freedom to move them around. If the fds already were at the right places then the specific fds are
 								         * -1. Let's now move them to the right places. This is the point of no return. */
 								        for (i = 0; i < 3; i++) {
 								                if (fd[i] == i) {
 								                        /* fd is already in place, but let's make sure O_CLOEXEC is off */
 								                        r = fd_cloexec(i, false);
 								                        if (r < 0)
 								                                goto finish;
 								                } else {
 								                        assert(fd[i] > 2);
 								                        if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
 								                                r = -errno;
 								                                goto finish;
 								                        }
 								                }
 								        }
 								        r = 0;
 								finish:
 								        /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
 								         * fd passed in multiple times. */
 								        safe_close_above_stdio(original_input_fd);
 								        if (original_output_fd != original_input_fd)
 								                safe_close_above_stdio(original_output_fd);
 								        if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
 								                safe_close_above_stdio(original_error_fd);
 								        /* Close the copies we moved > 2 */
 								        for (i = 0; i < 3; i++)
 								                safe_close(copy_fd[i]);
 								        /* Close our null fd, if it's > 2 */
 								        safe_close_above_stdio(null_fd);
 								        return r;
 								}
-												fd-util: introduce fd_reopen() helper for reopening an fd

We have the same code for this in place at various locations, let's
unify that. Also, let's repurpose test-fs-util.c as a test for this new
helper cal..

											
										
										
											2018-03-26 13:25:51 +02:00
 								int fd_reopen(int fd, int flags) {
 								        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
 								        int new_fd;
 								        /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
 								         * turn O_RDWR fds into O_RDONLY fds.
 								         *
 								         * This doesn't work on sockets (since they cannot be open()ed, ever).
 								         *
 								         * This implicitly resets the file read index to 0. */
 								        xsprintf(procfs_path, "/proc/self/fd/%i", fd);
 								        new_fd = open(procfs_path, flags);
-												tmpfiles: if we get ENOENT when opening /proc/self/fd/, check if /proc is mounted

let's return ENOSYS in that case, to make things a bit less confusng.

Previously we'd just propagate ENOENT, which people might mistake as
applying to the object being modified rather than /proc/ just not being
there.

Let's return ENOSYS instead, i.e. an error clearly indicating that some
kernel API is not available. This hopefully should put people on a
better track.

Note that we only do the procfs check in the error path, which hopefully
means it's the less likely path.

We probably can add similar bits to more suitable codepaths dealing with
/proc/self/fd, but for now, let's pick to the ones noticed in #14745.

Fixes: #14745

											
										
										
											2020-04-23 14:52:10 +02:00
+								        if (new_fd < 0) {
 								                if (errno != ENOENT)
 								                        return -errno;
 								                if (proc_mounted() == 0)
 								                        return -ENOSYS; /* if we have no /proc/, the concept is not implementable */
 								                return -ENOENT;
 								        }
-												fd-util: introduce fd_reopen() helper for reopening an fd

We have the same code for this in place at various locations, let's
unify that. Also, let's repurpose test-fs-util.c as a test for this new
helper cal..

											
										
										
											2018-03-26 13:25:51 +02:00
 								        return new_fd;
 								}
-												main: split out reading of /proc/sys/fs/nr_open into its own function

This doesn't really reduce the code size over all, but it does make main.c
shorter and more readable, and that's always a good thing.

											
										
										
											2018-06-05 15:21:47 +02:00
 								int read_nr_open(void) {
 								        _cleanup_free_ char *nr_open = NULL;
 								        int r;
 								        /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the
 								         * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */
 								        r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
 								        if (r < 0)
 								                log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
 								        else {
 								                int v;
 								                r = safe_atoi(nr_open, &v);
 								                if (r < 0)
 								                        log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
 								                else
 								                        return v;
 								        }
-												tree-wide: fix spelling of "fallback"

Similarly to "setup" vs. "set up", "fallback" is a noun, and "fall back"
is the verb. (This is pretty clear when we construct a sentence in the
present continous: "we are falling back" not "we are fallbacking").

											
										
										
											2020-08-20 11:23:26 +02:00
+								        /* If we fail, fall back to the hard-coded kernel limit of 1024 * 1024. */
-												main: split out reading of /proc/sys/fs/nr_open into its own function

This doesn't really reduce the code size over all, but it does make main.c
shorter and more readable, and that's always a good thing.

											
										
										
											2018-06-05 15:21:47 +02:00
+								        return 1024 * 1024;
 								}