Systemd/src/basic/fd-util.c
Lennart Poettering 7fe2903c23 fd-util: move certain fds above fd #2 (#8129)
This adds some paranoia code that moves some of the fds we allocate for
longer periods of times to fds > 2 if they are allocated below this
boundary. This is a paranoid safety thing, in order to avoid that
external code might end up erroneously use our fds under the assumption
they were valid stdin/stdout/stderr. Think: some app closes
stdin/stdout/stderr and then invokes 'fprintf(stderr, …' which causes
writes on our fds.

This both adds the helper to do the moving as well as ports over a
number of users to this new logic. Since we don't want to litter all our
code with invocations of this I tried to strictly focus on fds we keep
open for long periods of times only and only in code that is frequently
loaded into foreign programs (under the assumptions that in our own
codebase we are smart enough to always keep stdin/stdout/stderr
allocated to avoid this pitfall). Specifically this means all code used
by NSS and our sd-xyz API:

1. our logging APIs
2. sd-event
3. sd-bus
4. sd-resolve
5. sd-netlink

This changed was inspired by this:

https://github.com/systemd/systemd/issues/8075#issuecomment-363689755

This shows that apparently IRL there are programs that do close
stdin/stdout/stderr, and we should accomodate for that.

Note that this won't fix any bugs, this just makes sure that buggy
programs are less likely to interfere with out own code.
2018-02-09 17:53:28 +01:00

618 lines
18 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* SPDX-License-Identifier: LGPL-2.1+ */
/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <fcntl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <unistd.h>
#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "macro.h"
#include "memfd-util.h"
#include "missing.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "socket-util.h"
#include "stdio-util.h"
#include "util.h"
int close_nointr(int fd) {
assert(fd >= 0);
if (close(fd) >= 0)
return 0;
/*
* Just ignore EINTR; a retry loop is the wrong thing to do on
* Linux.
*
* http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
* https://bugzilla.gnome.org/show_bug.cgi?id=682819
* http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
* https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
*/
if (errno == EINTR)
return 0;
return -errno;
}
int safe_close(int fd) {
/*
* Like close_nointr() but cannot fail. Guarantees errno is
* unchanged. Is a NOP with negative fds passed, and returns
* -1, so that it can be used in this syntax:
*
* fd = safe_close(fd);
*/
if (fd >= 0) {
PROTECT_ERRNO;
/* The kernel might return pretty much any error code
* via close(), but the fd will be closed anyway. The
* only condition we want to check for here is whether
* the fd was invalid at all... */
assert_se(close_nointr(fd) != -EBADF);
}
return -1;
}
void safe_close_pair(int p[]) {
assert(p);
if (p[0] == p[1]) {
/* Special case pairs which use the same fd in both
* directions... */
p[0] = p[1] = safe_close(p[0]);
return;
}
p[0] = safe_close(p[0]);
p[1] = safe_close(p[1]);
}
void close_many(const int fds[], unsigned n_fd) {
unsigned i;
assert(fds || n_fd <= 0);
for (i = 0; i < n_fd; i++)
safe_close(fds[i]);
}
int fclose_nointr(FILE *f) {
assert(f);
/* Same as close_nointr(), but for fclose() */
if (fclose(f) == 0)
return 0;
if (errno == EINTR)
return 0;
return -errno;
}
FILE* safe_fclose(FILE *f) {
/* Same as safe_close(), but for fclose() */
if (f) {
PROTECT_ERRNO;
assert_se(fclose_nointr(f) != EBADF);
}
return NULL;
}
DIR* safe_closedir(DIR *d) {
if (d) {
PROTECT_ERRNO;
assert_se(closedir(d) >= 0 || errno != EBADF);
}
return NULL;
}
int fd_nonblock(int fd, bool nonblock) {
int flags, nflags;
assert(fd >= 0);
flags = fcntl(fd, F_GETFL, 0);
if (flags < 0)
return -errno;
if (nonblock)
nflags = flags | O_NONBLOCK;
else
nflags = flags & ~O_NONBLOCK;
if (nflags == flags)
return 0;
if (fcntl(fd, F_SETFL, nflags) < 0)
return -errno;
return 0;
}
int fd_cloexec(int fd, bool cloexec) {
int flags, nflags;
assert(fd >= 0);
flags = fcntl(fd, F_GETFD, 0);
if (flags < 0)
return -errno;
if (cloexec)
nflags = flags | FD_CLOEXEC;
else
nflags = flags & ~FD_CLOEXEC;
if (nflags == flags)
return 0;
if (fcntl(fd, F_SETFD, nflags) < 0)
return -errno;
return 0;
}
void stdio_unset_cloexec(void) {
(void) fd_cloexec(STDIN_FILENO, false);
(void) fd_cloexec(STDOUT_FILENO, false);
(void) fd_cloexec(STDERR_FILENO, false);
}
_pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
unsigned i;
assert(n_fdset == 0 || fdset);
for (i = 0; i < n_fdset; i++)
if (fdset[i] == fd)
return true;
return false;
}
int close_all_fds(const int except[], unsigned n_except) {
_cleanup_closedir_ DIR *d = NULL;
struct dirent *de;
int r = 0;
assert(n_except == 0 || except);
d = opendir("/proc/self/fd");
if (!d) {
int fd;
struct rlimit rl;
/* When /proc isn't available (for example in chroots)
* the fallback is brute forcing through the fd
* table */
assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
int q;
if (fd_in_set(fd, except, n_except))
continue;
q = close_nointr(fd);
if (q < 0 && q != -EBADF && r >= 0)
r = q;
}
return r;
}
FOREACH_DIRENT(de, d, return -errno) {
int fd = -1, q;
if (safe_atoi(de->d_name, &fd) < 0)
/* Let's better ignore this, just in case */
continue;
if (fd < 3)
continue;
if (fd == dirfd(d))
continue;
if (fd_in_set(fd, except, n_except))
continue;
q = close_nointr(fd);
if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
r = q;
}
return r;
}
int same_fd(int a, int b) {
struct stat sta, stb;
pid_t pid;
int r, fa, fb;
assert(a >= 0);
assert(b >= 0);
/* Compares two file descriptors. Note that semantics are
* quite different depending on whether we have kcmp() or we
* don't. If we have kcmp() this will only return true for
* dup()ed file descriptors, but not otherwise. If we don't
* have kcmp() this will also return true for two fds of the same
* file, created by separate open() calls. Since we use this
* call mostly for filtering out duplicates in the fd store
* this difference hopefully doesn't matter too much. */
if (a == b)
return true;
/* Try to use kcmp() if we have it. */
pid = getpid_cached();
r = kcmp(pid, pid, KCMP_FILE, a, b);
if (r == 0)
return true;
if (r > 0)
return false;
if (errno != ENOSYS)
return -errno;
/* We don't have kcmp(), use fstat() instead. */
if (fstat(a, &sta) < 0)
return -errno;
if (fstat(b, &stb) < 0)
return -errno;
if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
return false;
/* We consider all device fds different, since two device fds
* might refer to quite different device contexts even though
* they share the same inode and backing dev_t. */
if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
return false;
if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
return false;
/* The fds refer to the same inode on disk, let's also check
* if they have the same fd flags. This is useful to
* distinguish the read and write side of a pipe created with
* pipe(). */
fa = fcntl(a, F_GETFL);
if (fa < 0)
return -errno;
fb = fcntl(b, F_GETFL);
if (fb < 0)
return -errno;
return fa == fb;
}
void cmsg_close_all(struct msghdr *mh) {
struct cmsghdr *cmsg;
assert(mh);
CMSG_FOREACH(cmsg, mh)
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
}
bool fdname_is_valid(const char *s) {
const char *p;
/* Validates a name for $LISTEN_FDNAMES. We basically allow
* everything ASCII that's not a control character. Also, as
* special exception the ":" character is not allowed, as we
* use that as field separator in $LISTEN_FDNAMES.
*
* Note that the empty string is explicitly allowed
* here. However, we limit the length of the names to 255
* characters. */
if (!s)
return false;
for (p = s; *p; p++) {
if (*p < ' ')
return false;
if (*p >= 127)
return false;
if (*p == ':')
return false;
}
return p - s < 256;
}
int fd_get_path(int fd, char **ret) {
char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
int r;
xsprintf(procfs_path, "/proc/self/fd/%i", fd);
r = readlink_malloc(procfs_path, ret);
if (r == -ENOENT) /* If the file doesn't exist the fd is invalid */
return -EBADF;
return r;
}
int move_fd(int from, int to, int cloexec) {
int r;
/* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
* 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
* off, if it is > 0 it is turned on. */
if (from < 0)
return -EBADF;
if (to < 0)
return -EBADF;
if (from == to) {
if (cloexec >= 0) {
r = fd_cloexec(to, cloexec);
if (r < 0)
return r;
}
return to;
}
if (cloexec < 0) {
int fl;
fl = fcntl(from, F_GETFD, 0);
if (fl < 0)
return -errno;
cloexec = !!(fl & FD_CLOEXEC);
}
r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
if (r < 0)
return -errno;
assert(r == to);
safe_close(from);
return to;
}
int acquire_data_fd(const void *data, size_t size, unsigned flags) {
char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
_cleanup_close_pair_ int pipefds[2] = { -1, -1 };
char pattern[] = "/dev/shm/data-fd-XXXXXX";
_cleanup_close_ int fd = -1;
int isz = 0, r;
ssize_t n;
off_t f;
assert(data || size == 0);
/* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
* complex than I wish it was. But here's why:
*
* a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
* read-only. Unfortunately they require kernel 3.17, and at the time of writing we still support 3.14.
*
* b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
* a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
* clients can only bump their size to a system-wide limit, which might be quite low.
*
* c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
* earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
* /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
*
* d) Finally, we try creating a regular file in /dev/shm, which we then delete.
*
* It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
* figure. */
if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
/* As a special case, return /dev/null if we have been called for an empty data block */
r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (r < 0)
return -errno;
return r;
}
if ((flags & ACQUIRE_NO_MEMFD) == 0) {
fd = memfd_new("data-fd");
if (fd < 0)
goto try_pipe;
n = write(fd, data, size);
if (n < 0)
return -errno;
if ((size_t) n != size)
return -EIO;
f = lseek(fd, 0, SEEK_SET);
if (f != 0)
return -errno;
r = memfd_set_sealed(fd);
if (r < 0)
return r;
r = fd;
fd = -1;
return r;
}
try_pipe:
if ((flags & ACQUIRE_NO_PIPE) == 0) {
if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
return -errno;
isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
if (isz < 0)
return -errno;
if ((size_t) isz < size) {
isz = (int) size;
if (isz < 0 || (size_t) isz != size)
return -E2BIG;
/* Try to bump the pipe size */
(void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
/* See if that worked */
isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
if (isz < 0)
return -errno;
if ((size_t) isz < size)
goto try_dev_shm;
}
n = write(pipefds[1], data, size);
if (n < 0)
return -errno;
if ((size_t) n != size)
return -EIO;
(void) fd_nonblock(pipefds[0], false);
r = pipefds[0];
pipefds[0] = -1;
return r;
}
try_dev_shm:
if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
if (fd < 0)
goto try_dev_shm_without_o_tmpfile;
n = write(fd, data, size);
if (n < 0)
return -errno;
if ((size_t) n != size)
return -EIO;
/* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
xsprintf(procfs_path, "/proc/self/fd/%i", fd);
r = open(procfs_path, O_RDONLY|O_CLOEXEC);
if (r < 0)
return -errno;
return r;
}
try_dev_shm_without_o_tmpfile:
if ((flags & ACQUIRE_NO_REGULAR) == 0) {
fd = mkostemp_safe(pattern);
if (fd < 0)
return fd;
n = write(fd, data, size);
if (n < 0) {
r = -errno;
goto unlink_and_return;
}
if ((size_t) n != size) {
r = -EIO;
goto unlink_and_return;
}
/* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
r = open(pattern, O_RDONLY|O_CLOEXEC);
if (r < 0)
r = -errno;
unlink_and_return:
(void) unlink(pattern);
return r;
}
return -EOPNOTSUPP;
}
int fd_move_above_stdio(int fd) {
int flags, copy;
PROTECT_ERRNO;
/* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
* stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
* returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
* might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
* stdin/stdout/stderr of unrelated code.
*
* Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
* buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
* been closed before.
*
* This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
* error we simply return the original file descriptor, and we do not touch errno. */
if (fd < 0 || fd > 2)
return fd;
flags = fcntl(fd, F_GETFD, 0);
if (flags < 0)
return fd;
if (flags & FD_CLOEXEC)
copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
else
copy = fcntl(fd, F_DUPFD, 3);
if (copy < 0)
return fd;
assert(copy > 2);
(void) close(fd);
return copy;
}