Systemd/src/basic/process-util.c

/* SPDX-License-Identifier: LGPL-2.1-or-later */

#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <linux/oom.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <syslog.h>
#include <unistd.h>
#if HAVE_VALGRIND_VALGRIND_H
#include <valgrind/valgrind.h>
#endif

#include "alloc-util.h"
#include "architecture.h"
#include "env-util.h"
#include "errno-util.h"
#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "ioprio.h"
#include "locale-util.h"
#include "log.h"
#include "macro.h"
#include "memory-util.h"
#include "missing_sched.h"
#include "missing_syscall.h"
#include "namespace-util.h"
#include "path-util.h"
#include "process-util.h"
#include "raw-clone.h"
#include "rlimit-util.h"
#include "signal-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "string-table.h"
#include "string-util.h"
#include "terminal-util.h"
#include "user-util.h"
#include "utf8.h"

/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own
 * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel.
 */
#define COMM_MAX_LEN 128

static int get_process_state(pid_t pid) {
        _cleanup_free_ char *line = NULL;
        const char *p;
        char state;
        int r;

        assert(pid >= 0);

        /* Shortcut: if we are enquired about our own state, we are obviously running */
        if (pid == 0 || pid == getpid_cached())
                return (unsigned char) 'R';

        p = procfs_file_alloca(pid, "stat");

        r = read_one_line_file(p, &line);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        p = strrchr(line, ')');
        if (!p)
                return -EIO;

        p++;

        if (sscanf(p, " %c", &state) != 1)
                return -EIO;

        return (unsigned char) state;
}

int get_process_comm(pid_t pid, char **ret) {
        _cleanup_free_ char *escaped = NULL, *comm = NULL;
        int r;

        assert(ret);
        assert(pid >= 0);

        if (pid == 0 || pid == getpid_cached()) {
                comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */
                if (!comm)
                        return -ENOMEM;

                if (prctl(PR_GET_NAME, comm) < 0)
                        return -errno;
        } else {
                const char *p;

                p = procfs_file_alloca(pid, "comm");

                /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */
                r = read_one_line_file(p, &comm);
                if (r == -ENOENT)
                        return -ESRCH;
                if (r < 0)
                        return r;
        }

        escaped = new(char, COMM_MAX_LEN);
        if (!escaped)
                return -ENOMEM;

        /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
        cellescape(escaped, COMM_MAX_LEN, comm);

        *ret = TAKE_PTR(escaped);
        return 0;
}

int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **line) {
        _cleanup_fclose_ FILE *f = NULL;
        _cleanup_free_ char *t = NULL, *ans = NULL;
        const char *p;
        int r;
        size_t k;

        /* This is supposed to be a safety guard against runaway command lines. */
        size_t max_length = sc_arg_max();

        assert(line);
        assert(pid >= 0);

        /* Retrieves a process' command line. Replaces non-utf8 bytes by replacement character (<28>). If
         * max_columns is != -1 will return a string of the specified console width at most, abbreviated with
         * an ellipsis. If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command
         * line set (the case for kernel threads), or has a command line that resolves to the empty string
         * will return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of
         * input data.
         *
         * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
         * comm_fallback is false). Returns 0 and sets *line otherwise. */

        p = procfs_file_alloca(pid, "cmdline");
        r = fopen_unlocked(p, "re", &f);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        /* We assume that each four-byte character uses one or two columns. If we ever check for combining
         * characters, this assumption will need to be adjusted. */
        if ((size_t) 4 * max_columns + 1 < max_columns)
                max_length = MIN(max_length, (size_t) 4 * max_columns + 1);

        t = new(char, max_length);
        if (!t)
                return -ENOMEM;

        k = fread(t, 1, max_length, f);
        if (k > 0) {
                /* Arguments are separated by NULs. Let's replace those with spaces. */
                for (size_t i = 0; i < k - 1; i++)
                        if (t[i] == '\0')
                                t[i] = ' ';

                t[k] = '\0'; /* Normally, t[k] is already NUL, so this is just a guard in case of short read */
        } else {
                /* We only treat getting nothing as an error. We *could* also get an error after reading some
                 * data, but we ignore that case, as such an error is rather unlikely and we prefer to get
                 * some data rather than none. */
                if (ferror(f))
                        return -errno;

                if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK))
                        return -ENOENT;

                /* Kernel threads have no argv[] */
                _cleanup_free_ char *t2 = NULL;

                r = get_process_comm(pid, &t2);
                if (r < 0)
                        return r;

                mfree(t);
                t = strjoin("[", t2, "]");
                if (!t)
                        return -ENOMEM;
        }

        delete_trailing_chars(t, WHITESPACE);

        bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8();

        ans = escape_non_printable_full(t, max_columns, eight_bit);
        if (!ans)
                return -ENOMEM;

        (void) str_realloc(&ans);
        *line = TAKE_PTR(ans);
        return 0;
}

static int update_argv(const char name[], size_t l) {
        static int can_do = -1;

        if (can_do == 0)
                return 0;
        can_do = false; /* We'll set it to true only if the whole process works */

        /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
         * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
         * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
         * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
         * mmap() is not. */
        if (geteuid() != 0)
                return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
                                       "Skipping PR_SET_MM, as we don't have privileges.");

        static size_t mm_size = 0;
        static char *mm = NULL;
        int r;

        if (mm_size < l+1) {
                size_t nn_size;
                char *nn;

                nn_size = PAGE_ALIGN(l+1);
                nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
                if (nn == MAP_FAILED)
                        return log_debug_errno(errno, "mmap() failed: %m");

                strncpy(nn, name, nn_size);

                /* Now, let's tell the kernel about this new memory */
                if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
                        if (ERRNO_IS_PRIVILEGE(errno))
                                return log_debug_errno(errno, "PR_SET_MM_ARG_START failed: %m");

                        /* HACK: prctl() API is kind of dumb on this point.  The existing end address may already be
                         * below the desired start address, in which case the kernel may have kicked this back due
                         * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
                         * action).  The proper solution would be to have a prctl() API that could set both start+end
                         * simultaneously, or at least let us query the existing address to anticipate this condition
                         * and respond accordingly.  For now, we can only guess at the cause of this failure and try
                         * a workaround--which will briefly expand the arg space to something potentially huge before
                         * resizing it to what we want. */
                        log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");

                        if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
                                r = log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
                                (void) munmap(nn, nn_size);
                                return r;
                        }

                        if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0)
                                return log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
                } else {
                        /* And update the end pointer to the new end, too. If this fails, we don't really know what
                         * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
                         * and continue. */
                        if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
                                log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
                }

                if (mm)
                        (void) munmap(mm, mm_size);

                mm = nn;
                mm_size = nn_size;
        } else {
                strncpy(mm, name, mm_size);

                /* Update the end pointer, continuing regardless of any failure. */
                if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
                        log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
        }

        can_do = true;
        return 0;
}

int rename_process(const char name[]) {
        bool truncated = false;

        /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
         * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
         * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
         * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
         * truncated.
         *
         * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */

        if (isempty(name))
                return -EINVAL; /* let's not confuse users unnecessarily with an empty name */

        if (!is_main_thread())
                return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
                                * cache things without locking, and we make assumptions that PR_SET_NAME sets the
                                * process name that isn't correct on any other threads */

        size_t l = strlen(name);

        /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
         * can use PR_SET_NAME, which sets the thread name for the calling thread. */
        if (prctl(PR_SET_NAME, name) < 0)
                log_debug_errno(errno, "PR_SET_NAME failed: %m");
        if (l >= TASK_COMM_LEN) /* Linux userspace process names can be 15 chars at max */
                truncated = true;

        /* Second step, change glibc's ID of the process name. */
        if (program_invocation_name) {
                size_t k;

                k = strlen(program_invocation_name);
                strncpy(program_invocation_name, name, k);
                if (l > k)
                        truncated = true;
        }

        /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
         * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
         * the end. This is the best option for changing /proc/self/cmdline. */
        (void) update_argv(name, l);

        /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
         * it still looks here */
        if (saved_argc > 0) {
                if (saved_argv[0]) {
                        size_t k;

                        k = strlen(saved_argv[0]);
                        strncpy(saved_argv[0], name, k);
                        if (l > k)
                                truncated = true;
                }

                for (int i = 1; i < saved_argc; i++) {
                        if (!saved_argv[i])
                                break;

                        memzero(saved_argv[i], strlen(saved_argv[i]));
                }
        }

        return !truncated;
}

int is_kernel_thread(pid_t pid) {
        _cleanup_free_ char *line = NULL;
        unsigned long long flags;
        size_t l, i;
        const char *p;
        char *q;
        int r;

        if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
                return 0;
        if (!pid_is_valid(pid))
                return -EINVAL;

        p = procfs_file_alloca(pid, "stat");
        r = read_one_line_file(p, &line);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        /* Skip past the comm field */
        q = strrchr(line, ')');
        if (!q)
                return -EINVAL;
        q++;

        /* Skip 6 fields to reach the flags field */
        for (i = 0; i < 6; i++) {
                l = strspn(q, WHITESPACE);
                if (l < 1)
                        return -EINVAL;
                q += l;

                l = strcspn(q, WHITESPACE);
                if (l < 1)
                        return -EINVAL;
                q += l;
        }

        /* Skip preceding whitespace */
        l = strspn(q, WHITESPACE);
        if (l < 1)
                return -EINVAL;
        q += l;

        /* Truncate the rest */
        l = strcspn(q, WHITESPACE);
        if (l < 1)
                return -EINVAL;
        q[l] = 0;

        r = safe_atollu(q, &flags);
        if (r < 0)
                return r;

        return !!(flags & PF_KTHREAD);
}

int get_process_capeff(pid_t pid, char **capeff) {
        const char *p;
        int r;

        assert(capeff);
        assert(pid >= 0);

        p = procfs_file_alloca(pid, "status");

        r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
        if (r == -ENOENT)
                return -ESRCH;

        return r;
}

static int get_process_link_contents(const char *proc_file, char **name) {
        int r;

        assert(proc_file);
        assert(name);

        r = readlink_malloc(proc_file, name);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        return 0;
}

int get_process_exe(pid_t pid, char **name) {
        const char *p;
        char *d;
        int r;

        assert(pid >= 0);

        p = procfs_file_alloca(pid, "exe");
        r = get_process_link_contents(p, name);
        if (r < 0)
                return r;

        d = endswith(*name, " (deleted)");
        if (d)
                *d = '\0';

        return 0;
}

static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
        _cleanup_fclose_ FILE *f = NULL;
        const char *p;
        int r;

        assert(field);
        assert(uid);

        if (pid < 0)
                return -EINVAL;

        p = procfs_file_alloca(pid, "status");
        r = fopen_unlocked(p, "re", &f);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        for (;;) {
                _cleanup_free_ char *line = NULL;
                char *l;

                r = read_line(f, LONG_LINE_MAX, &line);
                if (r < 0)
                        return r;
                if (r == 0)
                        break;

                l = strstrip(line);

                if (startswith(l, field)) {
                        l += strlen(field);
                        l += strspn(l, WHITESPACE);

                        l[strcspn(l, WHITESPACE)] = 0;

                        return parse_uid(l, uid);
                }
        }

        return -EIO;
}

int get_process_uid(pid_t pid, uid_t *uid) {

        if (pid == 0 || pid == getpid_cached()) {
                *uid = getuid();
                return 0;
        }

        return get_process_id(pid, "Uid:", uid);
}

int get_process_gid(pid_t pid, gid_t *gid) {

        if (pid == 0 || pid == getpid_cached()) {
                *gid = getgid();
                return 0;
        }

        assert_cc(sizeof(uid_t) == sizeof(gid_t));
        return get_process_id(pid, "Gid:", gid);
}

int get_process_cwd(pid_t pid, char **cwd) {
        const char *p;

        assert(pid >= 0);

        if (pid == 0 || pid == getpid_cached())
                return safe_getcwd(cwd);

        p = procfs_file_alloca(pid, "cwd");

        return get_process_link_contents(p, cwd);
}

int get_process_root(pid_t pid, char **root) {
        const char *p;

        assert(pid >= 0);

        p = procfs_file_alloca(pid, "root");

        return get_process_link_contents(p, root);
}

#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)

int get_process_environ(pid_t pid, char **env) {
        _cleanup_fclose_ FILE *f = NULL;
        _cleanup_free_ char *outcome = NULL;
        size_t allocated = 0, sz = 0;
        const char *p;
        int r;

        assert(pid >= 0);
        assert(env);

        p = procfs_file_alloca(pid, "environ");

        r = fopen_unlocked(p, "re", &f);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        for (;;) {
                char c;

                if (sz >= ENVIRONMENT_BLOCK_MAX)
                        return -ENOBUFS;

                if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
                        return -ENOMEM;

                r = safe_fgetc(f, &c);
                if (r < 0)
                        return r;
                if (r == 0)
                        break;

                if (c == '\0')
                        outcome[sz++] = '\n';
                else
                        sz += cescape_char(c, outcome + sz);
        }

        outcome[sz] = '\0';
        *env = TAKE_PTR(outcome);

        return 0;
}

int get_process_ppid(pid_t pid, pid_t *_ppid) {
        int r;
        _cleanup_free_ char *line = NULL;
        long unsigned ppid;
        const char *p;

        assert(pid >= 0);
        assert(_ppid);

        if (pid == 0 || pid == getpid_cached()) {
                *_ppid = getppid();
                return 0;
        }

        p = procfs_file_alloca(pid, "stat");
        r = read_one_line_file(p, &line);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        /* Let's skip the pid and comm fields. The latter is enclosed
         * in () but does not escape any () in its value, so let's
         * skip over it manually */

        p = strrchr(line, ')');
        if (!p)
                return -EIO;

        p++;

        if (sscanf(p, " "
                   "%*c "  /* state */
                   "%lu ", /* ppid */
                   &ppid) != 1)
                return -EIO;

        if ((long unsigned) (pid_t) ppid != ppid)
                return -ERANGE;

        *_ppid = (pid_t) ppid;

        return 0;
}

int get_process_umask(pid_t pid, mode_t *umask) {
        _cleanup_free_ char *m = NULL;
        const char *p;
        int r;

        assert(umask);
        assert(pid >= 0);

        p = procfs_file_alloca(pid, "status");

        r = get_proc_field(p, "Umask", WHITESPACE, &m);
        if (r == -ENOENT)
                return -ESRCH;

        return parse_mode(m, umask);
}

int wait_for_terminate(pid_t pid, siginfo_t *status) {
        siginfo_t dummy;

        assert(pid >= 1);

        if (!status)
                status = &dummy;

        for (;;) {
                zero(*status);

                if (waitid(P_PID, pid, status, WEXITED) < 0) {

                        if (errno == EINTR)
                                continue;

                        return negative_errno();
                }

                return 0;
        }
}

/*
 * Return values:
 * < 0 : wait_for_terminate() failed to get the state of the
 *       process, the process was terminated by a signal, or
 *       failed for an unknown reason.
 * >=0 : The process terminated normally, and its exit code is
 *       returned.
 *
 * That is, success is indicated by a return value of zero, and an
 * error is indicated by a non-zero value.
 *
 * A warning is emitted if the process terminates abnormally,
 * and also if it returns non-zero unless check_exit_code is true.
 */
int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
        _cleanup_free_ char *buffer = NULL;
        siginfo_t status;
        int r, prio;

        assert(pid > 1);

        if (!name) {
                r = get_process_comm(pid, &buffer);
                if (r < 0)
                        log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
                else
                        name = buffer;
        }

        prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;

        r = wait_for_terminate(pid, &status);
        if (r < 0)
                return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));

        if (status.si_code == CLD_EXITED) {
                if (status.si_status != EXIT_SUCCESS)
                        log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
                                 "%s failed with exit status %i.", strna(name), status.si_status);
                else
                        log_debug("%s succeeded.", name);

                return status.si_status;

        } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {

                log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
                return -EPROTO;
        }

        log_full(prio, "%s failed due to unknown reason.", strna(name));
        return -EPROTO;
}

/*
 * Return values:
 *
 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
 *       was terminated by a signal, or failed for an unknown reason.
 *
 * >=0 : The process terminated normally with no failures.
 *
 * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
 * states are indicated by error is indicated by a non-zero value.
 *
 * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
 * to remain entirely race-free.
 */
int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
        sigset_t mask;
        int r;
        usec_t until;

        assert_se(sigemptyset(&mask) == 0);
        assert_se(sigaddset(&mask, SIGCHLD) == 0);

        /* Drop into a sigtimewait-based timeout. Waiting for the
         * pid to exit. */
        until = now(CLOCK_MONOTONIC) + timeout;
        for (;;) {
                usec_t n;
                siginfo_t status = {};
                struct timespec ts;

                n = now(CLOCK_MONOTONIC);
                if (n >= until)
                        break;

                r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
                /* Assuming we woke due to the child exiting. */
                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
                        if (status.si_pid == pid) {
                                /* This is the correct child.*/
                                if (status.si_code == CLD_EXITED)
                                        return (status.si_status == 0) ? 0 : -EPROTO;
                                else
                                        return -EPROTO;
                        }
                }
                /* Not the child, check for errors and proceed appropriately */
                if (r < 0) {
                        switch (r) {
                        case -EAGAIN:
                                /* Timed out, child is likely hung. */
                                return -ETIMEDOUT;
                        case -EINTR:
                                /* Received a different signal and should retry */
                                continue;
                        default:
                                /* Return any unexpected errors */
                                return r;
                        }
                }
        }

        return -EPROTO;
}

void sigkill_wait(pid_t pid) {
        assert(pid > 1);

        if (kill(pid, SIGKILL) >= 0)
                (void) wait_for_terminate(pid, NULL);
}

void sigkill_waitp(pid_t *pid) {
        PROTECT_ERRNO;

        if (!pid)
                return;
        if (*pid <= 1)
                return;

        sigkill_wait(*pid);
}

void sigterm_wait(pid_t pid) {
        assert(pid > 1);

        if (kill_and_sigcont(pid, SIGTERM) >= 0)
                (void) wait_for_terminate(pid, NULL);
}

int kill_and_sigcont(pid_t pid, int sig) {
        int r;

        r = kill(pid, sig) < 0 ? -errno : 0;

        /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
         * affected by a process being suspended anyway. */
        if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
                (void) kill(pid, SIGCONT);

        return r;
}

int getenv_for_pid(pid_t pid, const char *field, char **ret) {
        _cleanup_fclose_ FILE *f = NULL;
        char *value = NULL;
        const char *path;
        size_t l, sum = 0;
        int r;

        assert(pid >= 0);
        assert(field);
        assert(ret);

        if (pid == 0 || pid == getpid_cached()) {
                const char *e;

                e = getenv(field);
                if (!e) {
                        *ret = NULL;
                        return 0;
                }

                value = strdup(e);
                if (!value)
                        return -ENOMEM;

                *ret = value;
                return 1;
        }

        if (!pid_is_valid(pid))
                return -EINVAL;

        path = procfs_file_alloca(pid, "environ");

        r = fopen_unlocked(path, "re", &f);
        if (r == -ENOENT)
                return -ESRCH;
        if (r < 0)
                return r;

        l = strlen(field);
        for (;;) {
                _cleanup_free_ char *line = NULL;

                if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
                        return -ENOBUFS;

                r = read_nul_string(f, LONG_LINE_MAX, &line);
                if (r < 0)
                        return r;
                if (r == 0)  /* EOF */
                        break;

                sum += r;

                if (strneq(line, field, l) && line[l] == '=') {
                        value = strdup(line + l + 1);
                        if (!value)
                                return -ENOMEM;

                        *ret = value;
                        return 1;
                }
        }

        *ret = NULL;
        return 0;
}

int pid_is_my_child(pid_t pid) {
        pid_t ppid;
        int r;

        if (pid <= 1)
                return false;

        r = get_process_ppid(pid, &ppid);
        if (r < 0)
                return r;

        return ppid == getpid_cached();
}

bool pid_is_unwaited(pid_t pid) {
        /* Checks whether a PID is still valid at all, including a zombie */

        if (pid < 0)
                return false;

        if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
                return true;

        if (pid == getpid_cached())
                return true;

        if (kill(pid, 0) >= 0)
                return true;

        return errno != ESRCH;
}

bool pid_is_alive(pid_t pid) {
        int r;

        /* Checks whether a PID is still valid and not a zombie */

        if (pid < 0)
                return false;

        if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
                return true;

        if (pid == getpid_cached())
                return true;

        r = get_process_state(pid);
        if (IN_SET(r, -ESRCH, 'Z'))
                return false;

        return true;
}

int pid_from_same_root_fs(pid_t pid) {
        const char *root;

        if (pid < 0)
                return false;

        if (pid == 0 || pid == getpid_cached())
                return true;

        root = procfs_file_alloca(pid, "root");

        return files_same(root, "/proc/1/root", 0);
}

bool is_main_thread(void) {
        static thread_local int cached = 0;

        if (_unlikely_(cached == 0))
                cached = getpid_cached() == gettid() ? 1 : -1;

        return cached > 0;
}

_noreturn_ void freeze(void) {

        log_close();

        /* Make sure nobody waits for us on a socket anymore */
        (void) close_all_fds(NULL, 0);

        sync();

        /* Let's not freeze right away, but keep reaping zombies. */
        for (;;) {
                int r;
                siginfo_t si = {};

                r = waitid(P_ALL, 0, &si, WEXITED);
                if (r < 0 && errno != EINTR)
                        break;
        }

        /* waitid() failed with an unexpected error, things are really borked. Freeze now! */
        for (;;)
                pause();
}

bool oom_score_adjust_is_valid(int oa) {
        return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
}

unsigned long personality_from_string(const char *p) {
        int architecture;

        if (!p)
                return PERSONALITY_INVALID;

        /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
         * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
         * the same register size. */

        architecture = architecture_from_string(p);
        if (architecture < 0)
                return PERSONALITY_INVALID;

        if (architecture == native_architecture())
                return PER_LINUX;
#ifdef SECONDARY_ARCHITECTURE
        if (architecture == SECONDARY_ARCHITECTURE)
                return PER_LINUX32;
#endif

        return PERSONALITY_INVALID;
}

const char* personality_to_string(unsigned long p) {
        int architecture = _ARCHITECTURE_INVALID;

        if (p == PER_LINUX)
                architecture = native_architecture();
#ifdef SECONDARY_ARCHITECTURE
        else if (p == PER_LINUX32)
                architecture = SECONDARY_ARCHITECTURE;
#endif

        if (architecture < 0)
                return NULL;

        return architecture_to_string(architecture);
}

int safe_personality(unsigned long p) {
        int ret;

        /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
         * and in others as negative return value containing an errno-like value. Let's work around this: this is a
         * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
         * the return value indicating the same issue, so that we are definitely on the safe side.
         *
         * See https://github.com/systemd/systemd/issues/6737 */

        errno = 0;
        ret = personality(p);
        if (ret < 0) {
                if (errno != 0)
                        return -errno;

                errno = -ret;
        }

        return ret;
}

int opinionated_personality(unsigned long *ret) {
        int current;

        /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
         * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
         * two most relevant personalities: PER_LINUX and PER_LINUX32. */

        current = safe_personality(PERSONALITY_INVALID);
        if (current < 0)
                return current;

        if (((unsigned long) current & 0xffff) == PER_LINUX32)
                *ret = PER_LINUX32;
        else
                *ret = PER_LINUX;

        return 0;
}

void valgrind_summary_hack(void) {
#if HAVE_VALGRIND_VALGRIND_H
        if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
                pid_t pid;
                pid = raw_clone(SIGCHLD);
                if (pid < 0)
                        log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
                else if (pid == 0)
                        exit(EXIT_SUCCESS);
                else {
                        log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
                        (void) wait_for_terminate(pid, NULL);
                }
        }
#endif
}

int pid_compare_func(const pid_t *a, const pid_t *b) {
        /* Suitable for usage in qsort() */
        return CMP(*a, *b);
}

int ioprio_parse_priority(const char *s, int *ret) {
        int i, r;

        assert(s);
        assert(ret);

        r = safe_atoi(s, &i);
        if (r < 0)
                return r;

        if (!ioprio_priority_is_valid(i))
                return -EINVAL;

        *ret = i;
        return 0;
}

/* The cached PID, possible values:
 *
 *     == UNSET [0]  → cache not initialized yet
 *     == BUSY [-1]  → some thread is initializing it at the moment
 *     any other     → the cached PID
 */

#define CACHED_PID_UNSET ((pid_t) 0)
#define CACHED_PID_BUSY ((pid_t) -1)

static pid_t cached_pid = CACHED_PID_UNSET;

void reset_cached_pid(void) {
        /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
        cached_pid = CACHED_PID_UNSET;
}

/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
 * libpthread, as it is part of glibc anyway. */
extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle);
extern void* __dso_handle _weak_;

pid_t getpid_cached(void) {
        static bool installed = false;
        pid_t current_value;

        /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
         * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
         * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
         * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
         *
         * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
         * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
         */

        current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);

        switch (current_value) {

        case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
                pid_t new_pid;

                new_pid = raw_getpid();

                if (!installed) {
                        /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
                         * only half-documented (glibc doesn't document it but LSB does — though only superficially)
                         * we'll check for errors only in the most generic fashion possible. */

                        if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
                                /* OOM? Let's try again later */
                                cached_pid = CACHED_PID_UNSET;
                                return new_pid;
                        }

                        installed = true;
                }

                cached_pid = new_pid;
                return new_pid;
        }

        case CACHED_PID_BUSY: /* Somebody else is currently initializing */
                return raw_getpid();

        default: /* Properly initialized */
                return current_value;
        }
}

int must_be_root(void) {

        if (geteuid() == 0)
                return 0;

        return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
}

static void restore_sigsetp(sigset_t **ssp) {
        if (*ssp)
                (void) sigprocmask(SIG_SETMASK, *ssp, NULL);
}

int safe_fork_full(
                const char *name,
                const int except_fds[],
                size_t n_except_fds,
                ForkFlags flags,
                pid_t *ret_pid) {

        pid_t original_pid, pid;
        sigset_t saved_ss, ss;
        _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL;
        bool block_signals = false, block_all = false;
        int prio, r;

        /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
         * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */

        prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;

        original_pid = getpid_cached();

        if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) {
                /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can
                 * be sure that SIGTERMs are not lost we might send to the child. */

                assert_se(sigfillset(&ss) >= 0);
                block_signals = block_all = true;

        } else if (flags & FORK_WAIT) {
                /* Let's block SIGCHLD at least, so that we can safely watch for the child process */

                assert_se(sigemptyset(&ss) >= 0);
                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
                block_signals = true;
        }

        if (block_signals) {
                if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
                        return log_full_errno(prio, errno, "Failed to set signal mask: %m");
                saved_ssp = &saved_ss;
        }

        if (flags & FORK_NEW_MOUNTNS)
                pid = raw_clone(SIGCHLD|CLONE_NEWNS);
        else
                pid = fork();
        if (pid < 0)
                return log_full_errno(prio, errno, "Failed to fork: %m");
        if (pid > 0) {
                /* We are in the parent process */

                log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);

                if (flags & FORK_WAIT) {
                        if (block_all) {
                                /* undo everything except SIGCHLD */
                                ss = saved_ss;
                                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
                                (void) sigprocmask(SIG_SETMASK, &ss, NULL);
                        }

                        r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
                        if (r < 0)
                                return r;
                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
                                return -EPROTO;
                }

                if (ret_pid)
                        *ret_pid = pid;

                return 1;
        }

        /* We are in the child process */

        /* Restore signal mask manually */
        saved_ssp = NULL;

        if (flags & FORK_REOPEN_LOG) {
                /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
                log_close();
                log_set_open_when_needed(true);
        }

        if (name) {
                r = rename_process(name);
                if (r < 0)
                        log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
                                       r, "Failed to rename process, ignoring: %m");
        }

        if (flags & (FORK_DEATHSIG|FORK_DEATHSIG_SIGINT))
                if (prctl(PR_SET_PDEATHSIG, (flags & FORK_DEATHSIG_SIGINT) ? SIGINT : SIGTERM) < 0) {
                        log_full_errno(prio, errno, "Failed to set death signal: %m");
                        _exit(EXIT_FAILURE);
                }

        if (flags & FORK_RESET_SIGNALS) {
                r = reset_all_signal_handlers();
                if (r < 0) {
                        log_full_errno(prio, r, "Failed to reset signal handlers: %m");
                        _exit(EXIT_FAILURE);
                }

                /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
                r = reset_signal_mask();
                if (r < 0) {
                        log_full_errno(prio, r, "Failed to reset signal mask: %m");
                        _exit(EXIT_FAILURE);
                }
        } else if (block_signals) { /* undo what we did above */
                if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
                        log_full_errno(prio, errno, "Failed to restore signal mask: %m");
                        _exit(EXIT_FAILURE);
                }
        }

        if (flags & FORK_DEATHSIG) {
                pid_t ppid;
                /* Let's see if the parent PID is still the one we started from? If not, then the parent
                 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */

                ppid = getppid();
                if (ppid == 0)
                        /* Parent is in a different PID namespace. */;
                else if (ppid != original_pid) {
                        log_debug("Parent died early, raising SIGTERM.");
                        (void) raise(SIGTERM);
                        _exit(EXIT_FAILURE);
                }
        }

        if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {

                /* Optionally, make sure we never propagate mounts to the host. */

                if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
                        log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
                        _exit(EXIT_FAILURE);
                }
        }

        if (flags & FORK_CLOSE_ALL_FDS) {
                /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
                log_close();

                r = close_all_fds(except_fds, n_except_fds);
                if (r < 0) {
                        log_full_errno(prio, r, "Failed to close all file descriptors: %m");
                        _exit(EXIT_FAILURE);
                }
        }

        /* When we were asked to reopen the logs, do so again now */
        if (flags & FORK_REOPEN_LOG) {
                log_open();
                log_set_open_when_needed(false);
        }

        if (flags & FORK_NULL_STDIO) {
                r = make_null_stdio();
                if (r < 0) {
                        log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
                        _exit(EXIT_FAILURE);
                }

        } else if (flags & FORK_STDOUT_TO_STDERR) {
                if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) {
                        log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m");
                        _exit(EXIT_FAILURE);
                }
        }

        if (flags & FORK_RLIMIT_NOFILE_SAFE) {
                r = rlimit_nofile_safe();
                if (r < 0) {
                        log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
                        _exit(EXIT_FAILURE);
                }
        }

        if (ret_pid)
                *ret_pid = getpid_cached();

        return 0;
}

int namespace_fork(
                const char *outer_name,
                const char *inner_name,
                const int except_fds[],
                size_t n_except_fds,
                ForkFlags flags,
                int pidns_fd,
                int mntns_fd,
                int netns_fd,
                int userns_fd,
                int root_fd,
                pid_t *ret_pid) {

        int r;

        /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
         * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
         * /proc/self/fd works correctly. */

        r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
        if (r < 0)
                return r;
        if (r == 0) {
                pid_t pid;

                /* Child */

                r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
                if (r < 0) {
                        log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
                        _exit(EXIT_FAILURE);
                }

                /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
                r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid);
                if (r < 0)
                        _exit(EXIT_FAILURE);
                if (r == 0) {
                        /* Child */
                        if (ret_pid)
                                *ret_pid = pid;
                        return 0;
                }

                r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
                if (r < 0)
                        _exit(EXIT_FAILURE);

                _exit(r);
        }

        return 1;
}

int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
        bool stdout_is_tty, stderr_is_tty;
        size_t n, i;
        va_list ap;
        char **l;
        int r;

        assert(path);

        /* Spawns a temporary TTY agent, making sure it goes away when we go away */

        r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
        if (r < 0)
                return r;
        if (r > 0)
                return 0;

        /* In the child: */

        stdout_is_tty = isatty(STDOUT_FILENO);
        stderr_is_tty = isatty(STDERR_FILENO);

        if (!stdout_is_tty || !stderr_is_tty) {
                int fd;

                /* Detach from stdout/stderr. and reopen
                 * /dev/tty for them. This is important to
                 * ensure that when systemctl is started via
                 * popen() or a similar call that expects to
                 * read EOF we actually do generate EOF and
                 * not delay this indefinitely by because we
                 * keep an unused copy of stdin around. */
                fd = open("/dev/tty", O_WRONLY);
                if (fd < 0) {
                        log_error_errno(errno, "Failed to open /dev/tty: %m");
                        _exit(EXIT_FAILURE);
                }

                if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
                        log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
                        _exit(EXIT_FAILURE);
                }

                if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
                        log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
                        _exit(EXIT_FAILURE);
                }

                safe_close_above_stdio(fd);
        }

        (void) rlimit_nofile_safe();

        /* Count arguments */
        va_start(ap, path);
        for (n = 0; va_arg(ap, char*); n++)
                ;
        va_end(ap);

        /* Allocate strv */
        l = newa(char*, n + 1);

        /* Fill in arguments */
        va_start(ap, path);
        for (i = 0; i <= n; i++)
                l[i] = va_arg(ap, char*);
        va_end(ap);

        execv(path, l);
        _exit(EXIT_FAILURE);
}

int set_oom_score_adjust(int value) {
        char t[DECIMAL_STR_MAX(int)];

        sprintf(t, "%i", value);

        return write_string_file("/proc/self/oom_score_adj", t,
                                 WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
}

int pidfd_get_pid(int fd, pid_t *ret) {
        char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
        _cleanup_free_ char *fdinfo = NULL;
        char *p;
        int r;

        if (fd < 0)
                return -EBADF;

        xsprintf(path, "/proc/self/fdinfo/%i", fd);

        r = read_full_file(path, &fdinfo, NULL);
        if (r == -ENOENT) /* if fdinfo doesn't exist we assume the process does not exist */
                return -ESRCH;
        if (r < 0)
                return r;

        p = startswith(fdinfo, "Pid:");
        if (!p) {
                p = strstr(fdinfo, "\nPid:");
                if (!p)
                        return -ENOTTY; /* not a pidfd? */

                p += 5;
        }

        p += strspn(p, WHITESPACE);
        p[strcspn(p, WHITESPACE)] = 0;

        return parse_pid(p, ret);
}

static int rlimit_to_nice(rlim_t limit) {
        if (limit <= 1)
                return PRIO_MAX-1; /* i.e. 19 */

        if (limit >= -PRIO_MIN + PRIO_MAX)
                return PRIO_MIN; /* i.e. -20 */

        return PRIO_MAX - (int) limit;
}

int setpriority_closest(int priority) {
        int current, limit, saved_errno;
        struct rlimit highest;

        /* Try to set requested nice level */
        if (setpriority(PRIO_PROCESS, 0, priority) >= 0)
                return 1;

        /* Permission failed */
        saved_errno = -errno;
        if (!ERRNO_IS_PRIVILEGE(saved_errno))
                return saved_errno;

        errno = 0;
        current = getpriority(PRIO_PROCESS, 0);
        if (errno != 0)
                return -errno;

        if (priority == current)
                return 1;

       /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't,
        * then the whole setpriority() system call is blocked to us, hence let's propagate the error
        * right-away */
        if (priority > current)
                return saved_errno;

        if (getrlimit(RLIMIT_NICE, &highest) < 0)
                return -errno;

        limit = rlimit_to_nice(highest.rlim_cur);

        /* We are already less nice than limit allows us */
        if (current < limit) {
                log_debug("Cannot raise nice level, permissions and the resource limit do not allow it.");
                return 0;
        }

        /* Push to the allowed limit */
        if (setpriority(PRIO_PROCESS, 0, limit) < 0)
                return -errno;

        log_debug("Cannot set requested nice level (%i), used next best (%i).", priority, limit);
        return 0;
}

static const char *const ioprio_class_table[] = {
        [IOPRIO_CLASS_NONE] = "none",
        [IOPRIO_CLASS_RT] = "realtime",
        [IOPRIO_CLASS_BE] = "best-effort",
        [IOPRIO_CLASS_IDLE] = "idle",
};

DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES);

static const char *const sigchld_code_table[] = {
        [CLD_EXITED] = "exited",
        [CLD_KILLED] = "killed",
        [CLD_DUMPED] = "dumped",
        [CLD_TRAPPED] = "trapped",
        [CLD_STOPPED] = "stopped",
        [CLD_CONTINUED] = "continued",
};

DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);

static const char* const sched_policy_table[] = {
        [SCHED_OTHER] = "other",
        [SCHED_BATCH] = "batch",
        [SCHED_IDLE] = "idle",
        [SCHED_FIFO] = "fifo",
        [SCHED_RR] = "rr",
};

DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
-												license: LGPL-2.1+ -> LGPL-2.1-or-later

											
										
										
											2020-11-09 05:23:58 +01:00
+								/* SPDX-License-Identifier: LGPL-2.1-or-later */
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												util: split out escaping code into escape.[ch]

This really deserves its own file, given how much code this is now.

											
										
										
											2015-10-23 18:52:53 +02:00
+								#include <ctype.h>
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								#include <errno.h>
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include <limits.h>
 								#include <linux/oom.h>
-												util: split out escaping code into escape.[ch]

This really deserves its own file, given how much code this is now.

											
										
										
											2015-10-23 18:52:53 +02:00
+								#include <stdbool.h>
 								#include <stdio.h>
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include <stdlib.h>
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
+								#include <sys/mman.h>
-												process-util: add a new FORK_MOUNTNS_SLAVE flag for safe_fork()

We already have a flag for creating a new mount namespace for the child.
Let's add an extension to that: a new FORK_MOUNTNFS_SLAVE flag. When
used in combination will mark all mounts in the child namespace as
MS_SLAVE so that the child can freely mount or unmount stuff but it
won't leak into the parent.

											
										
										
											2018-03-23 20:52:46 +01:00
+								#include <sys/mount.h>
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								#include <sys/personality.h>
-												process-util: actually move rename_process() over

The prototype was moved long ago, actually move the definition over now,
too.

											
										
										
											2015-10-27 13:56:40 +01:00
+								#include <sys/prctl.h>
-												util: split out escaping code into escape.[ch]

This really deserves its own file, given how much code this is now.

											
										
										
											2015-10-23 18:52:53 +02:00
+								#include <sys/types.h>
 								#include <sys/wait.h>
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include <syslog.h>
-												util: split out escaping code into escape.[ch]

This really deserves its own file, given how much code this is now.

											
										
										
											2015-10-23 18:52:53 +02:00
+								#include <unistd.h>
-												build-sys: use #if Y instead of #ifdef Y everywhere

The advantage is that is the name is mispellt, cpp will warn us.

$ git grep -Ee "conf.set\('(HAVE|ENABLE)_" -l|xargs sed -r -i "s/conf.set\('(HAVE|ENABLE)_/conf.set10('\1_/"
$ git grep -Ee '#ifn?def (HAVE|ENABLE)' -l|xargs sed -r -i 's/#ifdef (HAVE|ENABLE)/#if \1/; s/#ifndef (HAVE|ENABLE)/#if ! \1/;'
$ git grep -Ee 'if.*defined\(HAVE' -l|xargs sed -i -r 's/defined\((HAVE_[A-Z0-9_]*)\)/\1/g'
$ git grep -Ee 'if.*defined\(ENABLE' -l|xargs sed -i -r 's/defined\((ENABLE_[A-Z0-9_]*)\)/\1/g'
+ manual changes to meson.build

squash! build-sys: use #if Y instead of #ifdef Y everywhere

v2:
- fix incorrect setting of HAVE_LIBIDN2

											
										
										
											2017-10-03 10:41:51 +02:00
+								#if HAVE_VALGRIND_VALGRIND_H
-												core: add valgrind helper for daemon-reexec

Inspired by https://github.com/systemd/systemd/issues/2187#issuecomment-165587140

											
										
										
											2016-01-19 16:48:45 +01:00
+								#include <valgrind/valgrind.h>
 								#endif
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												util-lib: split out allocation calls into alloc-util.[ch]

											
										
										
											2015-10-27 03:01:06 +01:00
+								#include "alloc-util.h"
-												util-lib: use the architecture ids from architecture.h for personalities

We have this ids, hence let's use them universally.

											
										
										
											2016-02-22 15:39:52 +01:00
+								#include "architecture.h"
-												Introduce sc_arg_max() helper

Just a cast and an assert.

											
										
										
											2019-05-11 09:51:33 +02:00
+								#include "env-util.h"
-												core: in execute, Never fail setting Nice priority

Instead, push to the closest possible Nice priority setting.

Replaces: #11397

											
										
										
											2017-08-01 18:38:05 +02:00
+								#include "errno-util.h"
-												process-util: tweak get_process_cwd() when calling for own process

Let's bypass /proc if we can.

											
										
										
											2019-11-25 14:55:50 +01:00
+								#include "escape.h"
-												util-lib: split out fd-related operations into fd-util.[ch]

There are more than enough to deserve their own .c file, hence move them
over.

											
										
										
											2015-10-25 13:14:12 +01:00
+								#include "fd-util.h"
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								#include "fileio.h"
-												util-lib: move a number of fs operations into fs-util.[ch]

											
										
										
											2015-10-26 21:16:26 +01:00
+								#include "fs-util.h"
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								#include "ioprio.h"
-												Add 8bit-version of get_process_cmdline() and use in cgroup-show.c

This restores show_pid_array() output in legacy locales on the console.
Only one call to get_process_cmdline() is changed, all others retain
utf8-only mode. This affects systemd-cgls, systemctl status, etc, when
working locally.

Calls to get_process_cmdline() that cross a process boundary always use
utf8. It's the callers responsibility to convert this to some encoding that
they use. This means that we always pass utf8 over the bus.

											
										
										
											2019-05-16 17:44:57 +02:00
+								#include "locale-util.h"
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								#include "log.h"
-												basic: include only what we use

This is a cleaned up result of running iwyu but without forward
declarations on src/basic.

											
										
										
											2015-11-30 21:43:37 +01:00
+								#include "macro.h"
-												util: split out memcmp()/memset() related calls into memory-util.[ch]

Just some source rearranging.

											
										
										
											2019-03-13 12:02:21 +01:00
+								#include "memory-util.h"
-												tree-wide: drop missing.h

											
										
										
											2019-10-31 03:07:23 +01:00
+								#include "missing_sched.h"
 								#include "missing_syscall.h"
-												util: split out namespace related stuff into a new namespace-util.[ch] pair

Just some minor reorganiztion.

											
										
										
											2019-03-13 11:21:49 +01:00
+								#include "namespace-util.h"
-												process-util: tweak get_process_cwd() when calling for own process

Let's bypass /proc if we can.

											
										
										
											2019-11-25 14:55:50 +01:00
+								#include "path-util.h"
-												basic: re-sort includes

My previous patch to only include what we use accidentially placed
the added inlcudes in non-sorted order.

											
										
										
											2015-12-01 23:22:03 +01:00
+								#include "process-util.h"
-												util-lib: Add sparc64 support for process creation (#3348)

The current raw_clone function takes two arguments, the cloning flags and
a pointer to the stack for the cloned child. The raw cloning without
passing a "thread main" function does not make sense if a new stack is
specified, as it returns in both the parent and the child, which will fail
in the child as the stack is virgin. All uses of raw_clone indeed pass NULL
for the stack pointer which indicates that both processes should share the
stack address (so you better don't pass CLONE_VM).

This commit refactors the code to not require the caller to pass the stack
address, as NULL is the only sensible option. It also adds the magic code
needed to make raw_clone work on sparc64, which does not return 0 in %o0
for the child, but indicates the child process by setting %o1 to non-zero.
This refactoring is not plain aesthetic, because non-NULL stack addresses
need to get mangled before being passed to the clone syscall (you have to
apply STACK_BIAS), whereas NULL must not be mangled. Implementing the
conditional mangling of the stack address would needlessly complicate the
code.

raw_clone is moved to a separete header, because the burden of including
the assert machinery and sched.h shouldn't be applied to every user of
missing_syscalls.h
											
										
										
											2016-05-30 02:03:51 +02:00
+								#include "raw-clone.h"
-												process-util: add new FORK_RLIMIT_NOFILE_SAFE flag for safe_fork()

The new flag simply means rlimit_nofile_safe() is called in the child
after all fds are rearranged.

											
										
										
											2018-11-26 15:59:17 +01:00
+								#include "rlimit-util.h"
-												basic: re-sort includes

My previous patch to only include what we use accidentially placed
the added inlcudes in non-sorted order.

											
										
										
											2015-12-01 23:22:03 +01:00
+								#include "signal-util.h"
-												shutdown: complain if process excluded from killing spree runs of the same rootfs as PID1

											
										
										
											2016-01-24 16:08:36 +01:00
+								#include "stat-util.h"
-												process-util: add helper pidfd_get_pid()

It returns the pid_t a pidfd refers to.

											
										
										
											2019-10-30 16:35:48 +01:00
+								#include "stdio-util.h"
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								#include "string-table.h"
-												util-lib: split our string related calls from util.[ch] into its own file string-util.[ch]

There are more than enough calls doing string manipulations to deserve
its own files, hence do something about it.

This patch also sorts the #include blocks of all files that needed to be
updated, according to the sorting suggestions from CODING_STYLE. Since
pretty much every file needs our string manipulation functions this
effectively means that most files have sorted #include blocks now.

Also touches a few unrelated include files.

											
										
										
											2015-10-24 22:58:24 +02:00
+								#include "string-util.h"
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								#include "terminal-util.h"
-												util-lib: split out user/group/uid/gid calls into user-util.[ch]

											
										
										
											2015-10-25 22:32:30 +01:00
+								#include "user-util.h"
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								#include "utf8.h"
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												util-lib: do not truncate kernel comm names

It turns out that the kernel allows comm names higher than our expected limit
of 16.
$ wc -c /proc/*/comm|sort -g|tail -n3
35 /proc/1292317/comm
35 /proc/1293610/comm
36 /proc/1287112/comm
$ cat /proc/1287112/comm
kworker/u9:3-kcryptd/253:0

											
										
										
											2019-05-15 11:55:59 +02:00
+								/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own
 								 * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel.
 								 */
 								#define COMM_MAX_LEN 128
-												small fixes: make get_process_state() static and fix typo

											
										
										
											2019-05-20 13:37:03 +02:00
+								static int get_process_state(pid_t pid) {
-												process-util: shortcut get_process_state() for our own process

											
										
										
											2019-11-25 14:59:01 +01:00
+								        _cleanup_free_ char *line = NULL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        const char *p;
 								        char state;
 								        int r;
 								        assert(pid >= 0);
-												process-util: shortcut get_process_state() for our own process

											
										
										
											2019-11-25 14:59:01 +01:00
+								        /* Shortcut: if we are enquired about our own state, we are obviously running */
 								        if (pid == 0 || pid == getpid_cached())
 								                return (unsigned char) 'R';
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        p = procfs_file_alloca(pid, "stat");
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        r = read_one_line_file(p, &line);
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								        if (r == -ENOENT)
 								                return -ESRCH;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        if (r < 0)
 								                return r;
 								        p = strrchr(line, ')');
 								        if (!p)
 								                return -EIO;
 								        p++;
 								        if (sscanf(p, " %c", &state) != 1)
 								                return -EIO;
 								        return (unsigned char) state;
 								}
-												process-util: also filter non-printable characters in get_process_com()

We already do that in get_process_cmdline(), which is very similar in
behaviour otherwise. Hence, let's be safe and also filter them in
get_process_comm(). Let's try to retain as much information as we can
though and escape rather than suppress unprintable characters. Let's not
increase comm names beyond the kernel limit on such names however.

Also see discussion about this here:

https://marc.info/?l=linux-api&m=152649570404881&w=2

											
										
										
											2018-05-17 03:50:35 +02:00
+								int get_process_comm(pid_t pid, char **ret) {
 								        _cleanup_free_ char *escaped = NULL, *comm = NULL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        int r;
-												process-util: also filter non-printable characters in get_process_com()

We already do that in get_process_cmdline(), which is very similar in
behaviour otherwise. Hence, let's be safe and also filter them in
get_process_comm(). Let's try to retain as much information as we can
though and escape rather than suppress unprintable characters. Let's not
increase comm names beyond the kernel limit on such names however.

Also see discussion about this here:

https://marc.info/?l=linux-api&m=152649570404881&w=2

											
										
										
											2018-05-17 03:50:35 +02:00
+								        assert(ret);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        assert(pid >= 0);
-												process-util: shortcut get_process_comm() for our own process

Let's bypass /proc if we can.

											
										
										
											2019-11-25 14:58:24 +01:00
+								        if (pid == 0 || pid == getpid_cached()) {
 								                comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */
 								                if (!comm)
 								                        return -ENOMEM;
 								                if (prctl(PR_GET_NAME, comm) < 0)
 								                        return -errno;
 								        } else {
 								                const char *p;
 								                p = procfs_file_alloca(pid, "comm");
 								                /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */
 								                r = read_one_line_file(p, &comm);
 								                if (r == -ENOENT)
 								                        return -ESRCH;
 								                if (r < 0)
 								                        return r;
 								        }
-												util-lib: do not truncate kernel comm names

It turns out that the kernel allows comm names higher than our expected limit
of 16.
$ wc -c /proc/*/comm|sort -g|tail -n3
35 /proc/1292317/comm
35 /proc/1293610/comm
36 /proc/1287112/comm
$ cat /proc/1287112/comm
kworker/u9:3-kcryptd/253:0

											
										
										
											2019-05-15 11:55:59 +02:00
+								        escaped = new(char, COMM_MAX_LEN);
-												process-util: also filter non-printable characters in get_process_com()

We already do that in get_process_cmdline(), which is very similar in
behaviour otherwise. Hence, let's be safe and also filter them in
get_process_comm(). Let's try to retain as much information as we can
though and escape rather than suppress unprintable characters. Let's not
increase comm names beyond the kernel limit on such names however.

Also see discussion about this here:

https://marc.info/?l=linux-api&m=152649570404881&w=2

											
										
										
											2018-05-17 03:50:35 +02:00
+								        if (!escaped)
 								                return -ENOMEM;
 								        /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
-												util-lib: do not truncate kernel comm names

It turns out that the kernel allows comm names higher than our expected limit
of 16.
$ wc -c /proc/*/comm|sort -g|tail -n3
35 /proc/1292317/comm
35 /proc/1293610/comm
36 /proc/1287112/comm
$ cat /proc/1287112/comm
kworker/u9:3-kcryptd/253:0

											
										
										
											2019-05-15 11:55:59 +02:00
+								        cellescape(escaped, COMM_MAX_LEN, comm);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: also filter non-printable characters in get_process_com()

We already do that in get_process_cmdline(), which is very similar in
behaviour otherwise. Hence, let's be safe and also filter them in
get_process_comm(). Let's try to retain as much information as we can
though and escape rather than suppress unprintable characters. Let's not
increase comm names beyond the kernel limit on such names however.

Also see discussion about this here:

https://marc.info/?l=linux-api&m=152649570404881&w=2

											
										
										
											2018-05-17 03:50:35 +02:00
+								        *ret = TAKE_PTR(escaped);
 								        return 0;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								}
-												basic/process-util: convert bool arg to flags

In preparation for the next commit…

											
										
										
											2019-05-16 12:14:52 +02:00
+								int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **line) {
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        _cleanup_fclose_ FILE *f = NULL;
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        _cleanup_free_ char *t = NULL, *ans = NULL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        const char *p;
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        int r;
 								        size_t k;
 								        /* This is supposed to be a safety guard against runaway command lines. */
 								        size_t max_length = sc_arg_max();
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        assert(line);
 								        assert(pid >= 0);
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        /* Retrieves a process' command line. Replaces non-utf8 bytes by replacement character (<28>). If
 								         * max_columns is != -1 will return a string of the specified console width at most, abbreviated with
-												basic/process-util: convert bool arg to flags

In preparation for the next commit…

											
										
										
											2019-05-16 12:14:52 +02:00
+								         * an ellipsis. If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command
 								         * line set (the case for kernel threads), or has a command line that resolves to the empty string
 								         * will return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of
 								         * input data.
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
+								         *
 								         * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
-												process-util: rename char *r to ans and add comment

Add a comment about the return value and rename r to ans. r is
nowadays reserved for the integer return value, and char *r is confusing.

											
										
										
											2017-01-15 18:41:34 +01:00
+								         * comm_fallback is false). Returns 0 and sets *line otherwise. */
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        p = procfs_file_alloca(pid, "cmdline");
-												Add fopen_unlocked() wrapper

											
										
										
											2019-04-04 10:17:16 +02:00
+								        r = fopen_unlocked(p, "re", &f);
 								        if (r == -ENOENT)
 								                return -ESRCH;
 								        if (r < 0)
 								                return r;
-												basic: turn off stdio locking for a couple of helper calls

These helper calls are potentially called often, and allocate FILE*
objects internally for a very short period of time, let's turn off
locking for them too.

											
										
										
											2017-12-11 20:01:55 +01:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        /* We assume that each four-byte character uses one or two columns. If we ever check for combining
 								         * characters, this assumption will need to be adjusted. */
 								        if ((size_t) 4 * max_columns + 1 < max_columns)
 								                max_length = MIN(max_length, (size_t) 4 * max_columns + 1);
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        t = new(char, max_length);
 								        if (!t)
 								                return -ENOMEM;
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        k = fread(t, 1, max_length, f);
 								        if (k > 0) {
 								                /* Arguments are separated by NULs. Let's replace those with spaces. */
 								                for (size_t i = 0; i < k - 1; i++)
 								                        if (t[i] == '\0')
 								                                t[i] = ' ';
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								                t[k] = '\0'; /* Normally, t[k] is already NUL, so this is just a guard in case of short read */
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        } else {
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								                /* We only treat getting nothing as an error. We *could* also get an error after reading some
 								                 * data, but we ignore that case, as such an error is rather unlikely and we prefer to get
 								                 * some data rather than none. */
 								                if (ferror(f))
 								                        return -errno;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												basic/process-util: convert bool arg to flags

In preparation for the next commit…

											
										
										
											2019-05-16 12:14:52 +02:00
+								                if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK))
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                        return -ENOENT;
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								                /* Kernel threads have no argv[] */
 								                _cleanup_free_ char *t2 = NULL;
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								                r = get_process_comm(pid, &t2);
 								                if (r < 0)
 								                        return r;
-												util-lib: rework get_process_cmdline() (#3529)

This reworks get_process_cmdline() quite substantially, fixing the following:

- Fixes:
  https://github.com/systemd/systemd/pull/3512/commits/a4e3bf4d7ac2de51191ce136ee9361ba319e106c#r66837630

- The passed max_length is also applied to the "comm" name, if comm_fallback is
  set.

- The right thing happens if max_length == 1 is specified

- when the cmdline "foobar" is abbreviated to 6 characters the result is not
  "foobar" instead of "foo...".

- trailing whitespace are removed before the ... suffix is appended. The 7
  character abbreviation of "foo barz" is hence "foo..." instead of "foo ...".

- leading whitespace are suppressed from the cmdline

- a comprehensive test case is added
											
										
										
											2016-06-14 23:52:29 +02:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								                mfree(t);
 								                t = strjoin("[", t2, "]");
 								                if (!t)
 								                        return -ENOMEM;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        }
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        delete_trailing_chars(t, WHITESPACE);
-												process-util: don't use overly large buffer to store process command line

Allocate new string as a return value and free our "scratch pad"
buffer that is potentially much larger than needed (up to
_SC_ARG_MAX).

Fixes #11502

											
										
										
											2019-01-22 14:29:50 +01:00
-												Add 8bit-version of get_process_cmdline() and use in cgroup-show.c

This restores show_pid_array() output in legacy locales on the console.
Only one call to get_process_cmdline() is changed, all others retain
utf8-only mode. This affects systemd-cgls, systemctl status, etc, when
working locally.

Calls to get_process_cmdline() that cross a process boundary always use
utf8. It's the callers responsibility to convert this to some encoding that
they use. This means that we always pass utf8 over the bus.

											
										
										
											2019-05-16 17:44:57 +02:00
+								        bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8();
 								        ans = escape_non_printable_full(t, max_columns, eight_bit);
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        if (!ans)
 								                return -ENOMEM;
-												process-util: don't use overly large buffer to store process command line

Allocate new string as a return value and free our "scratch pad"
buffer that is potentially much larger than needed (up to
_SC_ARG_MAX).

Fixes #11502

											
										
										
											2019-01-22 14:29:50 +01:00
-												Rework cmdline printing to use unicode

The functions to retrieve and print process cmdlines were based on the
assumption that they contain printable ASCII, and everything else
should be filtered out. That assumption doesn't hold in today's world,
where people are free to use unicode everywhere.

This replaces the custom cmdline reading code with a more generic approach
using utf8_escape_non_printable_full().
For kernel threads, truncation is done on the parenthesized name, so we'll
get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of
available columns.

This implementation is most likely slower for very long cmdlines, but I don't
think this is very important. The common case is to have short commandlines,
and should print those properly. Absurdly long cmdlines are the exception,
which needs to be handled correctly and safely, but speed is not too important.

Fixes #12532.

v2:
- use size_t for the number of columns. This change propagates into various
  other functions that call get_process_cmdline(), increasing the size of the
  patch, but the changes are rather trivial.

											
										
										
											2019-05-15 11:20:26 +02:00
+								        (void) str_realloc(&ans);
 								        *line = TAKE_PTR(ans);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        return 0;
 								}
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								static int update_argv(const char name[], size_t l) {
-												basic/process-util: only try PR_SET_MM once

userwork wants to update the title many times, and a strace is full of
attempts that fail the same way:

[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0

If we get a permission error, don't try again.

											
										
										
											2020-05-22 09:23:31 +02:00
+								        static int can_do = -1;
 								        if (can_do == 0)
 								                return 0;
 								        can_do = false; /* We'll set it to true only if the whole process works */
-												process-util: update the end pointer of the process name on rename (#6492)

We only updated the end pointer when allocating new memory, i.e. on the first
call to rename_process.

											
										
										
											2017-08-02 17:08:31 +02:00
+								        /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
 								         * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
 								         * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
 								         * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
 								         * mmap() is not. */
 								        if (geteuid() != 0)
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								                return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
 								                                       "Skipping PR_SET_MM, as we don't have privileges.");
 								        static size_t mm_size = 0;
 								        static char *mm = NULL;
 								        int r;
 								        if (mm_size < l+1) {
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
+								                size_t nn_size;
 								                char *nn;
 								                nn_size = PAGE_ALIGN(l+1);
 								                nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								                if (nn == MAP_FAILED)
 								                        return log_debug_errno(errno, "mmap() failed: %m");
-												process-util: actually move rename_process() over

The prototype was moved long ago, actually move the definition over now,
too.

											
										
										
											2015-10-27 13:56:40 +01:00
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
+								                strncpy(nn, name, nn_size);
 								                /* Now, let's tell the kernel about this new memory */
 								                if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
-												basic/process-util: only try PR_SET_MM once

userwork wants to update the title many times, and a strace is full of
attempts that fail the same way:

[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0

If we get a permission error, don't try again.

											
										
										
											2020-05-22 09:23:31 +02:00
+								                        if (ERRNO_IS_PRIVILEGE(errno))
 								                                return log_debug_errno(errno, "PR_SET_MM_ARG_START failed: %m");
-												 * hack around deficiencies in prctl() PR_SET_MM_*

											
										
										
											2018-10-04 23:06:16 +02:00
+								                        /* HACK: prctl() API is kind of dumb on this point.  The existing end address may already be
 								                         * below the desired start address, in which case the kernel may have kicked this back due
 								                         * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
 								                         * action).  The proper solution would be to have a prctl() API that could set both start+end
 								                         * simultaneously, or at least let us query the existing address to anticipate this condition
 								                         * and respond accordingly.  For now, we can only guess at the cause of this failure and try
 								                         * a workaround--which will briefly expand the arg space to something potentially huge before
 								                         * resizing it to what we want. */
 								                        log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");
 								                        if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								                                r = log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
-												 * hack around deficiencies in prctl() PR_SET_MM_*

											
										
										
											2018-10-04 23:06:16 +02:00
+								                                (void) munmap(nn, nn_size);
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								                                return r;
-												 * hack around deficiencies in prctl() PR_SET_MM_*

											
										
										
											2018-10-04 23:06:16 +02:00
+								                        }
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								                        if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0)
 								                                return log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
-												 * hack around deficiencies in prctl() PR_SET_MM_*

											
										
										
											2018-10-04 23:06:16 +02:00
+								                } else {
 								                        /* And update the end pointer to the new end, too. If this fails, we don't really know what
 								                         * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
 								                         * and continue. */
 								                        if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
 								                                log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 								                }
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
 								                if (mm)
 								                        (void) munmap(mm, mm_size);
 								                mm = nn;
 								                mm_size = nn_size;
-												process-util: update the end pointer of the process name on rename (#6492)

We only updated the end pointer when allocating new memory, i.e. on the first
call to rename_process.

											
										
										
											2017-08-02 17:08:31 +02:00
+								        } else {
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
+								                strncpy(mm, name, mm_size);
-												process-util: update the end pointer of the process name on rename (#6492)

We only updated the end pointer when allocating new memory, i.e. on the first
call to rename_process.

											
										
										
											2017-08-02 17:08:31 +02:00
+								                /* Update the end pointer, continuing regardless of any failure. */
 								                if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
 								                        log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
 								        }
-												basic/process-util: only try PR_SET_MM once

userwork wants to update the title many times, and a strace is full of
attempts that fail the same way:

[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce32901d, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0
[pid 21765] accept4(3, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK) = -1 EAGAIN (Resource temporarily unavailable)
[pid 21765] prctl(PR_SET_NAME, "systemd-userwor"...) = 0
[pid 21765] geteuid()                   = 0
[pid 21765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fedce329000
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_START, 0x7fedce329000, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] prctl(PR_SET_MM, PR_SET_MM_ARG_END, 0x7fedce329020, 0, 0) = -1 EPERM (Operation not permitted)
[pid 21765] munmap(0x7fedce329000, 4096) = 0

If we get a permission error, don't try again.

											
										
										
											2020-05-22 09:23:31 +02:00
+								        can_do = true;
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								        return 0;
 								}
 								int rename_process(const char name[]) {
 								        bool truncated = false;
 								        /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
 								         * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
 								         * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
 								         * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
 								         * truncated.
 								         *
 								         * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
 								        if (isempty(name))
 								                return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
 								        if (!is_main_thread())
 								                return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
 								                                * cache things without locking, and we make assumptions that PR_SET_NAME sets the
 								                                * process name that isn't correct on any other threads */
 								        size_t l = strlen(name);
 								        /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
 								         * can use PR_SET_NAME, which sets the thread name for the calling thread. */
 								        if (prctl(PR_SET_NAME, name) < 0)
 								                log_debug_errno(errno, "PR_SET_NAME failed: %m");
 								        if (l >= TASK_COMM_LEN) /* Linux userspace process names can be 15 chars at max */
 								                truncated = true;
 								        /* Second step, change glibc's ID of the process name. */
 								        if (program_invocation_name) {
 								                size_t k;
 								                k = strlen(program_invocation_name);
 								                strncpy(program_invocation_name, name, k);
 								                if (l > k)
 								                        truncated = true;
 								        }
 								        /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
 								         * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
 								         * the end. This is the best option for changing /proc/self/cmdline. */
 								        (void) update_argv(name, l);
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
+								        /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
 								         * it still looks here */
-												process-util: actually move rename_process() over

The prototype was moved long ago, actually move the definition over now,
too.

											
										
										
											2015-10-27 13:56:40 +01:00
+								        if (saved_argc > 0) {
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
+								                if (saved_argv[0]) {
 								                        size_t k;
 								                        k = strlen(saved_argv[0]);
 								                        strncpy(saved_argv[0], name, k);
 								                        if (l > k)
 								                                truncated = true;
 								                }
-												process-util: actually move rename_process() over

The prototype was moved long ago, actually move the definition over now,
too.

											
										
										
											2015-10-27 13:56:40 +01:00
-												basic/process-util: create helper function

No functional change.

											
										
										
											2020-05-22 09:00:49 +02:00
+								                for (int i = 1; i < saved_argc; i++) {
-												process-util: actually move rename_process() over

The prototype was moved long ago, actually move the definition over now,
too.

											
										
										
											2015-10-27 13:56:40 +01:00
+								                        if (!saved_argv[i])
 								                                break;
 								                        memzero(saved_argv[i], strlen(saved_argv[i]));
 								                }
 								        }
-												util-lib: rework rename_process() to be able to make use of PR_SET_MM_ARG_START

PR_SET_MM_ARG_START allows us to relatively cleanly implement process renaming.
However, it's only available with privileges. Hence, let's try to make use of
it, and if we can't fall back to the traditional way of overriding argv[0].

This removes size restrictions on the process name shown in argv[] at least for
privileged processes.

											
										
										
											2016-12-06 20:29:07 +01:00
 								        return !truncated;
-												process-util: actually move rename_process() over

The prototype was moved long ago, actually move the definition over now,
too.

											
										
										
											2015-10-27 13:56:40 +01:00
+								}
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								int is_kernel_thread(pid_t pid) {
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        _cleanup_free_ char *line = NULL;
 								        unsigned long long flags;
 								        size_t l, i;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        const char *p;
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        char *q;
 								        int r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												tree-wide: use IN_SET macro (#6977)


											
										
										
											2017-10-04 16:01:32 +02:00
+								        if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                return 0;
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        if (!pid_is_valid(pid))
 								                return -EINVAL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        p = procfs_file_alloca(pid, "stat");
 								        r = read_one_line_file(p, &line);
 								        if (r == -ENOENT)
 								                return -ESRCH;
 								        if (r < 0)
 								                return r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        /* Skip past the comm field */
 								        q = strrchr(line, ')');
 								        if (!q)
 								                return -EINVAL;
 								        q++;
 								        /* Skip 6 fields to reach the flags field */
 								        for (i = 0; i < 6; i++) {
 								                l = strspn(q, WHITESPACE);
 								                if (l < 1)
 								                        return -EINVAL;
 								                q += l;
 								                l = strcspn(q, WHITESPACE);
 								                if (l < 1)
 								                        return -EINVAL;
 								                q += l;
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								        }
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												trivial: fix spelling in code comments

Based-on-patch-by: Rafael Fontenelle <rafaelff@gnome.org>

											
										
										
											2018-09-30 21:20:08 +02:00
+								        /* Skip preceding whitespace */
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        l = strspn(q, WHITESPACE);
 								        if (l < 1)
 								                return -EINVAL;
 								        q += l;
-												basic: turn off stdio locking for a couple of helper calls

These helper calls are potentially called often, and allocate FILE*
objects internally for a very short period of time, let's turn off
locking for them too.

											
										
										
											2017-12-11 20:01:55 +01:00
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        /* Truncate the rest */
 								        l = strcspn(q, WHITESPACE);
 								        if (l < 1)
 								                return -EINVAL;
 								        q[l] = 0;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        r = safe_atollu(q, &flags);
 								        if (r < 0)
 								                return r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: be more careful in is_kernel_thread()

This reworks is_kernel_thread() a bit. Instead of checking whether
/proc/$pid/cmdline is entirely empty we now parse the 'flags' field from
/proc/$pid/stat and check the PF_KTHREAD flag, which directly encodes
whether something is a kernel thread.

Why all this? With current kernels userspace processes can set their
command line to empty too (through PR_SET_MM_ARG_START and friends), and
could potentially confuse us. Hence, let's use a more reliable way to
detect kernels like this.

											
										
										
											2018-02-06 15:59:55 +01:00
+								        return !!(flags & PF_KTHREAD);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								}
 								int get_process_capeff(pid_t pid, char **capeff) {
 								        const char *p;
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								        int r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        assert(capeff);
 								        assert(pid >= 0);
 								        p = procfs_file_alloca(pid, "status");
-												fileio: make get_status_field() more generic

All users of get_status_field() expect the field pattern to occur in
the beginning of a line, and the delimiter is ':'.

Hardcode this into the function, and also skip any whitespace before ':'
to support fields in files like /proc/cpuinfo. Add support for returning
the full field value (currently stops on first whitespace).

Rename the function so it's easier to ensure all callers switch to new
semantics.

											
										
										
											2015-09-30 14:57:55 +02:00
+								        r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								        if (r == -ENOENT)
 								                return -ESRCH;
 								        return r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								}
 								static int get_process_link_contents(const char *proc_file, char **name) {
 								        int r;
 								        assert(proc_file);
 								        assert(name);
 								        r = readlink_malloc(proc_file, name);
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								        if (r == -ENOENT)
 								                return -ESRCH;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        if (r < 0)
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								                return r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        return 0;
 								}
 								int get_process_exe(pid_t pid, char **name) {
 								        const char *p;
 								        char *d;
 								        int r;
 								        assert(pid >= 0);
 								        p = procfs_file_alloca(pid, "exe");
 								        r = get_process_link_contents(p, name);
 								        if (r < 0)
 								                return r;
 								        d = endswith(*name, " (deleted)");
 								        if (d)
 								                *d = '\0';
 								        return 0;
 								}
 								static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
 								        _cleanup_fclose_ FILE *f = NULL;
 								        const char *p;
-												process-util: FOREACH_LINE excorcism

											
										
										
											2018-10-18 16:08:51 +02:00
+								        int r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        assert(field);
 								        assert(uid);
-												Revert "tree-wide: use pid_is_valid() at more places"

This reverts commit ee043777be58251e7441b4f04594e9e3792d7fb2.

It broke almost everywhere it touched.  The places that
handn't been converted, were mostly followed by special
handling for the invalid PID `0`.  That explains why they
tested for `pid < 0` instead of `pid <= 0`.

I think that one was the first commit I reviewed, heh.

											
										
										
											2017-10-03 13:05:24 +02:00
+								        if (pid < 0)
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
+								                return -EINVAL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        p = procfs_file_alloca(pid, "status");
-												Add fopen_unlocked() wrapper

											
										
										
											2019-04-04 10:17:16 +02:00
+								        r = fopen_unlocked(p, "re", &f);
 								        if (r == -ENOENT)
 								                return -ESRCH;
 								        if (r < 0)
 								                return r;
-												basic: turn off stdio locking for a couple of helper calls

These helper calls are potentially called often, and allocate FILE*
objects internally for a very short period of time, let's turn off
locking for them too.

											
										
										
											2017-12-11 20:01:55 +01:00
-												process-util: FOREACH_LINE excorcism

											
										
										
											2018-10-18 16:08:51 +02:00
+								        for (;;) {
 								                _cleanup_free_ char *line = NULL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                char *l;
-												process-util: FOREACH_LINE excorcism

											
										
										
											2018-10-18 16:08:51 +02:00
+								                r = read_line(f, LONG_LINE_MAX, &line);
 								                if (r < 0)
 								                        return r;
 								                if (r == 0)
 								                        break;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                l = strstrip(line);
 								                if (startswith(l, field)) {
 								                        l += strlen(field);
 								                        l += strspn(l, WHITESPACE);
 								                        l[strcspn(l, WHITESPACE)] = 0;
 								                        return parse_uid(l, uid);
 								                }
 								        }
 								        return -EIO;
 								}
 								int get_process_uid(pid_t pid, uid_t *uid) {
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
 								        if (pid == 0 || pid == getpid_cached()) {
 								                *uid = getuid();
 								                return 0;
 								        }
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        return get_process_id(pid, "Uid:", uid);
 								}
 								int get_process_gid(pid_t pid, gid_t *gid) {
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
 								        if (pid == 0 || pid == getpid_cached()) {
 								                *gid = getgid();
 								                return 0;
 								        }
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        assert_cc(sizeof(uid_t) == sizeof(gid_t));
 								        return get_process_id(pid, "Gid:", gid);
 								}
 								int get_process_cwd(pid_t pid, char **cwd) {
 								        const char *p;
 								        assert(pid >= 0);
-												process-util: tweak get_process_cwd() when calling for own process

Let's bypass /proc if we can.

											
										
										
											2019-11-25 14:55:50 +01:00
+								        if (pid == 0 || pid == getpid_cached())
 								                return safe_getcwd(cwd);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        p = procfs_file_alloca(pid, "cwd");
 								        return get_process_link_contents(p, cwd);
 								}
 								int get_process_root(pid_t pid, char **root) {
 								        const char *p;
 								        assert(pid >= 0);
 								        p = procfs_file_alloca(pid, "root");
 								        return get_process_link_contents(p, root);
 								}
-												process-util: make get_process_environ() safer

Let's add a size limit, and let's use safe_fgetc().

											
										
										
											2018-12-17 11:23:15 +01:00
+								#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								int get_process_environ(pid_t pid, char **env) {
 								        _cleanup_fclose_ FILE *f = NULL;
 								        _cleanup_free_ char *outcome = NULL;
 								        size_t allocated = 0, sz = 0;
-												process-util: make get_process_environ() safer

Let's add a size limit, and let's use safe_fgetc().

											
										
										
											2018-12-17 11:23:15 +01:00
+								        const char *p;
 								        int r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        assert(pid >= 0);
 								        assert(env);
 								        p = procfs_file_alloca(pid, "environ");
-												Add fopen_unlocked() wrapper

											
										
										
											2019-04-04 10:17:16 +02:00
+								        r = fopen_unlocked(p, "re", &f);
 								        if (r == -ENOENT)
 								                return -ESRCH;
 								        if (r < 0)
 								                return r;
-												basic: turn off stdio locking for a couple of helper calls

These helper calls are potentially called often, and allocate FILE*
objects internally for a very short period of time, let's turn off
locking for them too.

											
										
										
											2017-12-11 20:01:55 +01:00
-												process-util: make get_process_environ() safer

Let's add a size limit, and let's use safe_fgetc().

											
										
										
											2018-12-17 11:23:15 +01:00
+								        for (;;) {
 								                char c;
 								                if (sz >= ENVIRONMENT_BLOCK_MAX)
 								                        return -ENOBUFS;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
 								                        return -ENOMEM;
-												process-util: make get_process_environ() safer

Let's add a size limit, and let's use safe_fgetc().

											
										
										
											2018-12-17 11:23:15 +01:00
+								                r = safe_fgetc(f, &c);
 								                if (r < 0)
 								                        return r;
 								                if (r == 0)
 								                        break;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                if (c == '\0')
 								                        outcome[sz++] = '\n';
 								                else
 								                        sz += cescape_char(c, outcome + sz);
 								        }
-												process-util: make get_process_environ() safer

Let's add a size limit, and let's use safe_fgetc().

											
										
										
											2018-12-17 11:23:15 +01:00
+								        outcome[sz] = '\0';
-												macro: introduce TAKE_PTR() macro

This macro will read a pointer of any type, return it, and set the
pointer to NULL. This is useful as an explicit concept of passing
ownership of a memory area between pointers.

This takes inspiration from Rust:

https://doc.rust-lang.org/std/option/enum.Option.html#method.take

and was suggested by Alan Jenkins (@sourcejedi).

It drops ~160 lines of code from our codebase, which makes me like it.
Also, I think it clarifies passing of ownership, and thus helps
readability a bit (at least for the initiated who know the new macro)

											
										
										
											2018-03-22 16:53:26 +01:00
+								        *env = TAKE_PTR(outcome);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        return 0;
 								}
-												process-util: rename get_parent_of_pid() → get_process_ppid()

In order to match the other get_process_xyz() calls.

											
										
										
											2015-10-27 14:01:48 +01:00
+								int get_process_ppid(pid_t pid, pid_t *_ppid) {
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        int r;
 								        _cleanup_free_ char *line = NULL;
 								        long unsigned ppid;
 								        const char *p;
 								        assert(pid >= 0);
 								        assert(_ppid);
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
+								        if (pid == 0 || pid == getpid_cached()) {
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                *_ppid = getppid();
 								                return 0;
 								        }
 								        p = procfs_file_alloca(pid, "stat");
 								        r = read_one_line_file(p, &line);
-												process: return ESRCH when a PID is not valid anymore

so far, when we read something from /proc/$PID we would pass on the
ENOENT from the kernel as error, if the process was missing. With this
change we systematically convert this to ESRCH, which is the more
appropriate error code, and what all the other glibc/syscalls like
kill() use.

All code that calls these functions should be fine with this change. In
fact, one invocation of get_process_exe() in bus-creds.c already assumed
ESRCH would be returned if a process is missing, and this assumption is
now validated after the change.

											
										
										
											2015-07-23 23:44:40 +02:00
+								        if (r == -ENOENT)
 								                return -ESRCH;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        if (r < 0)
 								                return r;
 								        /* Let's skip the pid and comm fields. The latter is enclosed
 								         * in () but does not escape any () in its value, so let's
 								         * skip over it manually */
 								        p = strrchr(line, ')');
 								        if (!p)
 								                return -EIO;
 								        p++;
 								        if (sscanf(p, " "
 								                   "%*c "  /* state */
 								                   "%lu ", /* ppid */
 								                   &ppid) != 1)
 								                return -EIO;
 								        if ((long unsigned) (pid_t) ppid != ppid)
 								                return -ERANGE;
 								        *_ppid = (pid_t) ppid;
 								        return 0;
 								}
-												pid1: by default make user units inherit their umask from the user manager

This patch changes the way user managers set the default umask for the units it
manages.

Indeed one can expect that if user manager's umask is redefined through PAM
(via /etc/login.defs or pam_umask), all its children including the units it
spawns have their umask set to the new value.

Hence make user units inherit their umask value from their parent instead of
the hard coded value 0022 but allow them to override this value via their unit
file.

Note that reexecuting managers with 'systemctl daemon-reexec' after changing
UMask= has no effect. To take effect managers need to be restarted with
'systemct restart' instead. This behavior was already present before this
patch.

Fixes #6077.

											
										
										
											2020-04-03 10:00:25 +02:00
+								int get_process_umask(pid_t pid, mode_t *umask) {
 								        _cleanup_free_ char *m = NULL;
 								        const char *p;
 								        int r;
 								        assert(umask);
 								        assert(pid >= 0);
 								        p = procfs_file_alloca(pid, "status");
 								        r = get_proc_field(p, "Umask", WHITESPACE, &m);
 								        if (r == -ENOENT)
 								                return -ESRCH;
 								        return parse_mode(m, umask);
 								}
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								int wait_for_terminate(pid_t pid, siginfo_t *status) {
 								        siginfo_t dummy;
 								        assert(pid >= 1);
 								        if (!status)
 								                status = &dummy;
 								        for (;;) {
 								                zero(*status);
 								                if (waitid(P_PID, pid, status, WEXITED) < 0) {
 								                        if (errno == EINTR)
 								                                continue;
-												tree-wide: some work-arounds for gcc false positives regarding uninitialized variables

											
										
										
											2016-06-21 13:20:02 +02:00
+								                        return negative_errno();
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                }
 								                return 0;
 								        }
 								}
 								/*
 								 * Return values:
 								 * < 0 : wait_for_terminate() failed to get the state of the
 								 *       process, the process was terminated by a signal, or
 								 *       failed for an unknown reason.
 								 * >=0 : The process terminated normally, and its exit code is
 								 *       returned.
 								 *
 								 * That is, success is indicated by a return value of zero, and an
 								 * error is indicated by a non-zero value.
 								 *
 								 * A warning is emitted if the process terminates abnormally,
 								 * and also if it returns non-zero unless check_exit_code is true.
 								 */
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
 								        _cleanup_free_ char *buffer = NULL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        siginfo_t status;
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								        int r, prio;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        assert(pid > 1);
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								        if (!name) {
 								                r = get_process_comm(pid, &buffer);
 								                if (r < 0)
 								                        log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
 								                else
 								                        name = buffer;
 								        }
 								        prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        r = wait_for_terminate(pid, &status);
 								        if (r < 0)
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								                return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        if (status.si_code == CLD_EXITED) {
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								                if (status.si_status != EXIT_SUCCESS)
 								                        log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
 								                                 "%s failed with exit status %i.", strna(name), status.si_status);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                else
 								                        log_debug("%s succeeded.", name);
 								                return status.si_status;
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
-												tree-wide: use IN_SET where possible

In addition to the changes from #6933 this handles cases that could be
matched with the included cocci file.

											
										
										
											2017-09-29 00:37:23 +02:00
+								        } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								                log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                return -EPROTO;
 								        }
-												process-util: rework wait_for_terminate_and_warn() to take a flags parameter

This renames wait_for_terminate_and_warn() to
wait_for_terminate_and_check(), and adds a flags parameter, that
controls how much to log: there's one flag that means we log about
abnormal stuff, and another one that controls whether we log about
non-zero exit codes. Finally, there's a shortcut flag value for logging
in both cases, as that's what we usually use.

All callers are accordingly updated. At three occasions duplicate logging
is removed, i.e. where the old function was called but logged in the
caller, too.

											
										
										
											2017-12-28 00:51:19 +01:00
+								        log_full(prio, "%s failed due to unknown reason.", strna(name));
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        return -EPROTO;
 								}
-												core: Implement timeout based umount/remount limit

Remount, and subsequent umount, attempts can hang for inaccessible network
based mount points. This can leave a system in a hard hang state that
requires a hard reset in order to recover. This change moves the remount,
and umount attempts into separate child processes. The remount and umount
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
those waits fail, the parent will issue a SIGKILL to the child and continue
with the shutdown efforts.

In addition, instead of only reporting some additional errors on the final
attempt, failures are reported as they occur.

											
										
										
											2017-12-13 18:49:26 +01:00
+								/*
 								 * Return values:
-												process-util: mention that wait_for_terminate_with_timeout() should be called with SIGCHLD blocked

											
										
										
											2018-05-17 04:14:59 +02:00
+								 *
 								 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
 								 *       was terminated by a signal, or failed for an unknown reason.
 								 *
-												core: Implement timeout based umount/remount limit

Remount, and subsequent umount, attempts can hang for inaccessible network
based mount points. This can leave a system in a hard hang state that
requires a hard reset in order to recover. This change moves the remount,
and umount attempts into separate child processes. The remount and umount
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
those waits fail, the parent will issue a SIGKILL to the child and continue
with the shutdown efforts.

In addition, instead of only reporting some additional errors on the final
attempt, failures are reported as they occur.

											
										
										
											2017-12-13 18:49:26 +01:00
+								 * >=0 : The process terminated normally with no failures.
 								 *
-												process-util: mention that wait_for_terminate_with_timeout() should be called with SIGCHLD blocked

											
										
										
											2018-05-17 04:14:59 +02:00
+								 * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
 								 * states are indicated by error is indicated by a non-zero value.
 								 *
 								 * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
 								 * to remain entirely race-free.
-												core: Implement timeout based umount/remount limit

Remount, and subsequent umount, attempts can hang for inaccessible network
based mount points. This can leave a system in a hard hang state that
requires a hard reset in order to recover. This change moves the remount,
and umount attempts into separate child processes. The remount and umount
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
those waits fail, the parent will issue a SIGKILL to the child and continue
with the shutdown efforts.

In addition, instead of only reporting some additional errors on the final
attempt, failures are reported as they occur.

											
										
										
											2017-12-13 18:49:26 +01:00
+								 */
 								int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
 								        sigset_t mask;
 								        int r;
 								        usec_t until;
 								        assert_se(sigemptyset(&mask) == 0);
 								        assert_se(sigaddset(&mask, SIGCHLD) == 0);
 								        /* Drop into a sigtimewait-based timeout. Waiting for the
 								         * pid to exit. */
 								        until = now(CLOCK_MONOTONIC) + timeout;
 								        for (;;) {
 								                usec_t n;
 								                siginfo_t status = {};
 								                struct timespec ts;
 								                n = now(CLOCK_MONOTONIC);
 								                if (n >= until)
 								                        break;
 								                r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
 								                /* Assuming we woke due to the child exiting. */
 								                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
 								                        if (status.si_pid == pid) {
 								                                /* This is the correct child.*/
 								                                if (status.si_code == CLD_EXITED)
 								                                        return (status.si_status == 0) ? 0 : -EPROTO;
 								                                else
 								                                        return -EPROTO;
 								                        }
 								                }
 								                /* Not the child, check for errors and proceed appropriately */
 								                if (r < 0) {
 								                        switch (r) {
 								                        case -EAGAIN:
 								                                /* Timed out, child is likely hung. */
 								                                return -ETIMEDOUT;
 								                        case -EINTR:
 								                                /* Received a different signal and should retry */
 								                                continue;
 								                        default:
 								                                /* Return any unexpected errors */
 								                                return r;
 								                        }
 								                }
 								        }
 								        return -EPROTO;
 								}
-												util: rework sigkill_wait() to not require pid_t pointer

Let's make sigkill_wait() take a normal pid_t, and add sigkill_waitp() that
takes a pointer (which is useful for usage in _cleanup_), following the usual
logic we have for this.

											
										
										
											2016-04-29 19:23:23 +02:00
+								void sigkill_wait(pid_t pid) {
 								        assert(pid > 1);
-												process-util: check for correct kill return value (#10841)

Code was not doing a wait() after kill() due to checking for a return value > 0, and was leaving zombie processes. This affected things like  sd-bus unixexec connections.
											
										
										
											2018-11-20 04:35:36 +01:00
+								        if (kill(pid, SIGKILL) >= 0)
-												util: rework sigkill_wait() to not require pid_t pointer

Let's make sigkill_wait() take a normal pid_t, and add sigkill_waitp() that
takes a pointer (which is useful for usage in _cleanup_), following the usual
logic we have for this.

											
										
										
											2016-04-29 19:23:23 +02:00
+								                (void) wait_for_terminate(pid, NULL);
 								}
 								void sigkill_waitp(pid_t *pid) {
-												util-lib: save/restore errno in cleanup calls

We should be careful with errno in cleanup functions, and not alter it
under any circumstances. In the safe_close cleanup handlers we are
already safe in that regard, but let's add similar protections on other
cleanup handlers that invoke system calls.

Why bother? Cleanup handlers insert code at function return in
non-obvious ways. Hence, code that sets errno and returns should not be
confused by us overrding the errno from a cleanup handler.

This is a paranoia fix only, I am not aware where this actually mattered
in real-life situations.

											
										
										
											2018-01-10 17:21:15 +01:00
+								        PROTECT_ERRNO;
-												process-util: move more process related calls to process-util.[ch]

											
										
										
											2015-10-26 01:13:11 +01:00
+								        if (!pid)
 								                return;
 								        if (*pid <= 1)
 								                return;
-												util: rework sigkill_wait() to not require pid_t pointer

Let's make sigkill_wait() take a normal pid_t, and add sigkill_waitp() that
takes a pointer (which is useful for usage in _cleanup_), following the usual
logic we have for this.

											
										
										
											2016-04-29 19:23:23 +02:00
+								        sigkill_wait(*pid);
-												process-util: move more process related calls to process-util.[ch]

											
										
										
											2015-10-26 01:13:11 +01:00
+								}
-												sd-bus: cleanup ssh sessions (Closes: #8076)

we still invoke ssh unnecessarily when there in incompatible or erreneous input
The fallow-up to finish that would make the code a bit more verbose,
as it would require repeating this bit:
```
        r = bus_connect_transport(arg_transport, arg_host, false, &bus);
        if (r < 0) {
                log_error_errno(r, "Failed to create bus connection: %m");
                goto finish;
        }

        sd_bus_set_allow_interactive_authorization(bus, arg_ask_password);
```
in every verb, after parsing.

v2: add waitpid() to avoid a zombie process, switch to SIGTERM from SIGKILL
v3: refactor, wait in bus_start_address()

											
										
										
											2018-02-03 19:16:33 +01:00
+								void sigterm_wait(pid_t pid) {
 								        assert(pid > 1);
-												process-util: check for correct kill return value (#10841)

Code was not doing a wait() after kill() due to checking for a return value > 0, and was leaving zombie processes. This affected things like  sd-bus unixexec connections.
											
										
										
											2018-11-20 04:35:36 +01:00
+								        if (kill_and_sigcont(pid, SIGTERM) >= 0)
-												sd-bus: cleanup ssh sessions (Closes: #8076)

we still invoke ssh unnecessarily when there in incompatible or erreneous input
The fallow-up to finish that would make the code a bit more verbose,
as it would require repeating this bit:
```
        r = bus_connect_transport(arg_transport, arg_host, false, &bus);
        if (r < 0) {
                log_error_errno(r, "Failed to create bus connection: %m");
                goto finish;
        }

        sd_bus_set_allow_interactive_authorization(bus, arg_ask_password);
```
in every verb, after parsing.

v2: add waitpid() to avoid a zombie process, switch to SIGTERM from SIGKILL
v3: refactor, wait in bus_start_address()

											
										
										
											2018-02-03 19:16:33 +01:00
+								                (void) wait_for_terminate(pid, NULL);
 								}
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								int kill_and_sigcont(pid_t pid, int sig) {
 								        int r;
 								        r = kill(pid, sig) < 0 ? -errno : 0;
-												util: don't send SIGCONT following a SIGCONT or SIGKILL in kill_and_sigcont()

											
										
										
											2016-07-20 11:14:48 +02:00
+								        /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
 								         * affected by a process being suspended anyway. */
-												Fix check for signal in set (#5416)

IN_SET(SIGCONT, SIGKILL) will always evaluate to false.
The signal needs to be included as the first argument.

Fixup for 26f417d3e8dd2522adfdc4c8fed4c36fa40f48fc.
											
										
										
											2017-02-21 21:39:52 +01:00
+								        if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
-												util: don't send SIGCONT following a SIGCONT or SIGKILL in kill_and_sigcont()

											
										
										
											2016-07-20 11:14:48 +02:00
+								                (void) kill(pid, SIGCONT);
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        return r;
 								}
-												process: shortcut getenv_for_pid() for our own process

											
										
										
											2018-02-14 14:27:31 +01:00
+								int getenv_for_pid(pid_t pid, const char *field, char **ret) {
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        _cleanup_fclose_ FILE *f = NULL;
 								        char *value = NULL;
 								        const char *path;
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								        size_t l, sum = 0;
 								        int r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
 								        assert(pid >= 0);
 								        assert(field);
-												process: shortcut getenv_for_pid() for our own process

											
										
										
											2018-02-14 14:27:31 +01:00
+								        assert(ret);
 								        if (pid == 0 || pid == getpid_cached()) {
 								                const char *e;
 								                e = getenv(field);
 								                if (!e) {
 								                        *ret = NULL;
 								                        return 0;
 								                }
 								                value = strdup(e);
 								                if (!value)
 								                        return -ENOMEM;
 								                *ret = value;
 								                return 1;
 								        }
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								        if (!pid_is_valid(pid))
 								                return -EINVAL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        path = procfs_file_alloca(pid, "environ");
-												Add fopen_unlocked() wrapper

											
										
										
											2019-04-04 10:17:16 +02:00
+								        r = fopen_unlocked(path, "re", &f);
 								        if (r == -ENOENT)
 								                return -ESRCH;
 								        if (r < 0)
 								                return r;
-												basic: turn off stdio locking for a couple of helper calls

These helper calls are potentially called often, and allocate FILE*
objects internally for a very short period of time, let's turn off
locking for them too.

											
										
										
											2017-12-11 20:01:55 +01:00
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        l = strlen(field);
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								        for (;;) {
 								                _cleanup_free_ char *line = NULL;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								                if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
 								                        return -ENOBUFS;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								                r = read_nul_string(f, LONG_LINE_MAX, &line);
 								                if (r < 0)
 								                        return r;
 								                if (r == 0)  /* EOF */
 								                        break;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								                sum += r;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												basic/process-util: we need to take the shorter of two strings

==30496== Conditional jump or move depends on uninitialised value(s)
==30496==    at 0x489F654: memcmp (vg_replace_strmem.c:1091)
==30496==    by 0x49BF203: getenv_for_pid (process-util.c:678)
==30496==    by 0x4993ACB: detect_container (virt.c:442)
==30496==    by 0x182DFF: test_get_process_comm (test-process-util.c:98)
==30496==    by 0x185847: main (test-process-util.c:368)
==30496==

											
										
										
											2016-11-18 02:57:22 +01:00
+								                if (strneq(line, field, l) && line[l] == '=') {
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                        value = strdup(line + l + 1);
 								                        if (!value)
 								                                return -ENOMEM;
-												process: shortcut getenv_for_pid() for our own process

											
										
										
											2018-02-14 14:27:31 +01:00
+								                        *ret = value;
 								                        return 1;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                }
-												process-util: rework getenv_for_pid() to use read_nul_string()

											
										
										
											2018-12-17 12:17:36 +01:00
+								        }
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
-												process: shortcut getenv_for_pid() for our own process

											
										
										
											2018-02-14 14:27:31 +01:00
+								        *ret = NULL;
 								        return 0;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								}
-												process-util: introduce pid_is_my_child() helper

No functional changes.

											
										
										
											2019-03-18 11:48:34 +01:00
+								int pid_is_my_child(pid_t pid) {
 								        pid_t ppid;
 								        int r;
 								        if (pid <= 1)
 								                return false;
 								        r = get_process_ppid(pid, &ppid);
 								        if (r < 0)
 								                return r;
 								        return ppid == getpid_cached();
 								}
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								bool pid_is_unwaited(pid_t pid) {
 								        /* Checks whether a PID is still valid at all, including a zombie */
-												Revert "tree-wide: use pid_is_valid() at more places"

This reverts commit ee043777be58251e7441b4f04594e9e3792d7fb2.

It broke almost everywhere it touched.  The places that
handn't been converted, were mostly followed by special
handling for the invalid PID `0`.  That explains why they
tested for `pid < 0` instead of `pid <= 0`.

I think that one was the first commit I reviewed, heh.

											
										
										
											2017-10-03 13:05:24 +02:00
+								        if (pid < 0)
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                return false;
-												process-util: make some minor corrections to PID live detection

											
										
										
											2015-10-27 14:02:45 +01:00
+								        if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
 								                return true;
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
+								        if (pid == getpid_cached())
 								                return true;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        if (kill(pid, 0) >= 0)
 								                return true;
 								        return errno != ESRCH;
 								}
 								bool pid_is_alive(pid_t pid) {
 								        int r;
 								        /* Checks whether a PID is still valid and not a zombie */
-												Revert "tree-wide: use pid_is_valid() at more places"

This reverts commit ee043777be58251e7441b4f04594e9e3792d7fb2.

It broke almost everywhere it touched.  The places that
handn't been converted, were mostly followed by special
handling for the invalid PID `0`.  That explains why they
tested for `pid < 0` instead of `pid <= 0`.

I think that one was the first commit I reviewed, heh.

											
										
										
											2017-10-03 13:05:24 +02:00
+								        if (pid < 0)
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                return false;
-												process-util: make some minor corrections to PID live detection

											
										
										
											2015-10-27 14:02:45 +01:00
+								        if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
 								                return true;
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
+								        if (pid == getpid_cached())
 								                return true;
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								        r = get_process_state(pid);
-												tree-wide: use IN_SET macro (#6977)


											
										
										
											2017-10-04 16:01:32 +02:00
+								        if (IN_SET(r, -ESRCH, 'Z'))
-												shared: add process-util.[ch]

											
										
										
											2015-04-10 19:10:00 +02:00
+								                return false;
 								        return true;
 								}
-												util-lib: move is_main_thread() to process-util.[ch]

											
										
										
											2015-10-27 00:02:45 +01:00
-												shutdown: complain if process excluded from killing spree runs of the same rootfs as PID1

											
										
										
											2016-01-24 16:08:36 +01:00
+								int pid_from_same_root_fs(pid_t pid) {
 								        const char *root;
-												Revert "tree-wide: use pid_is_valid() at more places"

This reverts commit ee043777be58251e7441b4f04594e9e3792d7fb2.

It broke almost everywhere it touched.  The places that
handn't been converted, were mostly followed by special
handling for the invalid PID `0`.  That explains why they
tested for `pid < 0` instead of `pid <= 0`.

I think that one was the first commit I reviewed, heh.

											
										
										
											2017-10-03 13:05:24 +02:00
+								        if (pid < 0)
-												process-util: slightly optimize querying of our own process metadata

When we are checking our own data, we can optimize things a bit.

											
										
										
											2017-07-17 23:35:25 +02:00
+								                return false;
 								        if (pid == 0 || pid == getpid_cached())
 								                return true;
-												shutdown: complain if process excluded from killing spree runs of the same rootfs as PID1

											
										
										
											2016-01-24 16:08:36 +01:00
 								        root = procfs_file_alloca(pid, "root");
-												basic/path-util: allow flags for path_equal_or_files_same

No functional change, just a new parameters and the tests that
AT_SYMLINK_NOFOLLOW works as expected.

											
										
										
											2017-06-17 18:37:16 +02:00
+								        return files_same(root, "/proc/1/root", 0);
-												shutdown: complain if process excluded from killing spree runs of the same rootfs as PID1

											
										
										
											2016-01-24 16:08:36 +01:00
+								}
-												util-lib: move is_main_thread() to process-util.[ch]

											
										
										
											2015-10-27 00:02:45 +01:00
+								bool is_main_thread(void) {
 								        static thread_local int cached = 0;
 								        if (_unlikely_(cached == 0))
-												tree-wide: make use of getpid_cached() wherever we can

This moves pretty much all uses of getpid() over to getpid_raw(). I
didn't specifically check whether the optimization is worth it for each
replacement, but in order to keep things simple and systematic I
switched over everything at once.

											
										
										
											2017-07-20 16:19:18 +02:00
+								                cached = getpid_cached() == gettid() ? 1 : -1;
-												util-lib: move is_main_thread() to process-util.[ch]

											
										
										
											2015-10-27 00:02:45 +01:00
 								        return cached > 0;
 								}
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
-												basic/macros: rename noreturn into _noreturn_ (#8456)

"noreturn" is reserved and can be used in other header files we include:

  [   16s] In file included from /usr/include/gcrypt.h:30:0,
  [   16s]                  from ../src/journal/journal-file.h:26,
  [   16s]                  from ../src/journal/journal-vacuum.c:31:
  [   16s] /usr/include/gpg-error.h:1544:46: error: expected ‘,’ or ‘;’ before ‘)’ token
  [   16s]  void gpgrt_log_bug (const char *fmt, ...)    GPGRT_ATTR_NR_PRINTF(1,2);

Here we include grcrypt.h (which in turns include gpg-error.h) *after* we
"noreturn" was defined in macro.h.
											
										
										
											2018-03-15 06:23:46 +01:00
+								_noreturn_ void freeze(void) {
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
-												core: set all log fds to -1 when freezing (#3314)

Fixes:
-bash-4.3# echo core >/proc/sys/kernel/core_pattern
-bash-4.3# kill -ABRT 1
-bash-4.3# kill -ABRT 1
[   61.373922] systemd[1]: segfault at 7fff1d0a8f48 ip 00007fc9ca91b1c3 sp 00007fff1d0a8f50 error 6 in libc-2.23.so[7fc9ca8ce000+1c0000]
[   61.768017] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000008b
[   61.768017]
...

Recursive ABRT and segfault:
PID 1 - core
TID 1:
...
 #153905 0x00005575fc3f829d log_dispatch
 #153906 0x00005575fc3f8aa3 log_assert
 #153907 0x00005575fc3f8ae9 log_assert_failed
 #153908 0x00005575fc3e7eb1 safe_close
 #153909 0x00005575fc3f6d5e log_close_journal
 #153910 0x00005575fc3f829d log_dispatch
 #153911 0x00005575fc3f85a1 log_internalv
 #153912 0x00005575fc3f86a1 log_internal
 #153913 0x00005575fc31c4c1 crash
 #153914 0x00007fb26f2cf3d0 __restore_rt
 #153915 0x00007fb26f2ced00 pause
 #153916 0x00005575fc403944 freeze
 #153917 0x00005575fc31bf7b freeze_or_reboot
...
											
										
										
											2016-05-21 18:40:34 +02:00
+								        log_close();
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								        /* Make sure nobody waits for us on a socket anymore */
-												Handle or voidify all calls to close_all_fds()

In activate, it is important that we close the fds. In other cases, meh.

											
										
										
											2019-03-15 15:35:49 +01:00
+								        (void) close_all_fds(NULL, 0);
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
 								        sync();
-												process-util: make our freeze() routine do something useful

When we crash we freeze() our-self (or possibly we reboot the machine if
that is configured). However, calling pause() is very unhelpful thing to
do. We should at least continue to do what init systems being doing
since 70's and that is reaping zombies. Otherwise zombies start to
accumulate on the system which is a very bad thing. As that can prevent
admin from taking manual steps to reboot the machine in somewhat
graceful manner (e.g. manually stopping services, unmounting data
volumes  and calling reboot -f).

Fixes #7783

											
										
										
											2018-01-12 13:05:48 +01:00
+								        /* Let's not freeze right away, but keep reaping zombies. */
 								        for (;;) {
 								                int r;
 								                siginfo_t si = {};
 								                r = waitid(P_ALL, 0, &si, WEXITED);
 								                if (r < 0 && errno != EINTR)
 								                        break;
 								        }
 								        /* waitid() failed with an unexpected error, things are really borked. Freeze now! */
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								        for (;;)
 								                pause();
 								}
 								bool oom_score_adjust_is_valid(int oa) {
 								        return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
 								}
 								unsigned long personality_from_string(const char *p) {
-												util-lib: use the architecture ids from architecture.h for personalities

We have this ids, hence let's use them universally.

											
										
										
											2016-02-22 15:39:52 +01:00
+								        int architecture;
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
-												util-lib: simplify personality() string matching

											
										
										
											2016-02-22 18:29:05 +01:00
+								        if (!p)
 								                return PERSONALITY_INVALID;
-												util-lib: use the architecture ids from architecture.h for personalities

We have this ids, hence let's use them universally.

											
										
										
											2016-02-22 15:39:52 +01:00
+								        /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
 								         * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
 								         * the same register size. */
 								        architecture = architecture_from_string(p);
 								        if (architecture < 0)
 								                return PERSONALITY_INVALID;
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
-												util-lib: simplify personality() string matching

											
										
										
											2016-02-22 18:29:05 +01:00
+								        if (architecture == native_architecture())
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								                return PER_LINUX;
-												util-lib: simplify personality() string matching

											
										
										
											2016-02-22 18:29:05 +01:00
+								#ifdef SECONDARY_ARCHITECTURE
 								        if (architecture == SECONDARY_ARCHITECTURE)
-												util-lib: support various ppc archs in personality logic

											
										
										
											2016-02-22 15:50:35 +01:00
+								                return PER_LINUX32;
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								#endif
 								        return PERSONALITY_INVALID;
 								}
 								const char* personality_to_string(unsigned long p) {
-												util-lib: use the architecture ids from architecture.h for personalities

We have this ids, hence let's use them universally.

											
										
										
											2016-02-22 15:39:52 +01:00
+								        int architecture = _ARCHITECTURE_INVALID;
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
 								        if (p == PER_LINUX)
-												util-lib: simplify personality() string matching

											
										
										
											2016-02-22 18:29:05 +01:00
+								                architecture = native_architecture();
 								#ifdef SECONDARY_ARCHITECTURE
-												util-lib: use the architecture ids from architecture.h for personalities

We have this ids, hence let's use them universally.

											
										
										
											2016-02-22 15:39:52 +01:00
+								        else if (p == PER_LINUX32)
-												util-lib: simplify personality() string matching

											
										
										
											2016-02-22 18:29:05 +01:00
+								                architecture = SECONDARY_ARCHITECTURE;
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								#endif
-												util-lib: use the architecture ids from architecture.h for personalities

We have this ids, hence let's use them universally.

											
										
										
											2016-02-22 15:39:52 +01:00
+								        if (architecture < 0)
 								                return NULL;
 								        return architecture_to_string(architecture);
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								}
-												util-lib: wrap personality() to fix up broken glibc error handling (#6766)

glibc appears to propagate different errors in different ways, let's fix
this up, so that our own code doesn't get confused by this.

See #6752 + #6737 for details.

Fixes: #6755
											
										
										
											2017-09-08 16:16:29 +02:00
+								int safe_personality(unsigned long p) {
 								        int ret;
 								        /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
 								         * and in others as negative return value containing an errno-like value. Let's work around this: this is a
 								         * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
 								         * the return value indicating the same issue, so that we are definitely on the safe side.
 								         *
 								         * See https://github.com/systemd/systemd/issues/6737 */
 								        errno = 0;
 								        ret = personality(p);
 								        if (ret < 0) {
 								                if (errno != 0)
 								                        return -errno;
 								                errno = -ret;
 								        }
 								        return ret;
 								}
-												seccomp: default to something resembling the current personality when locking it

Let's lock the personality to the currently set one, if nothing is
specifically specified. But do so with a grain of salt, and never
default to any exotic personality here, but only PER_LINUX or
PER_LINUX32.

											
										
										
											2017-08-09 20:40:26 +02:00
+								int opinionated_personality(unsigned long *ret) {
 								        int current;
 								        /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
 								         * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
 								         * two most relevant personalities: PER_LINUX and PER_LINUX32. */
-												util-lib: wrap personality() to fix up broken glibc error handling (#6766)

glibc appears to propagate different errors in different ways, let's fix
this up, so that our own code doesn't get confused by this.

See #6752 + #6737 for details.

Fixes: #6755
											
										
										
											2017-09-08 16:16:29 +02:00
+								        current = safe_personality(PERSONALITY_INVALID);
-												seccomp: default to something resembling the current personality when locking it

Let's lock the personality to the currently set one, if nothing is
specifically specified. But do so with a grain of salt, and never
default to any exotic personality here, but only PER_LINUX or
PER_LINUX32.

											
										
										
											2017-08-09 20:40:26 +02:00
+								        if (current < 0)
-												util-lib: wrap personality() to fix up broken glibc error handling (#6766)

glibc appears to propagate different errors in different ways, let's fix
this up, so that our own code doesn't get confused by this.

See #6752 + #6737 for details.

Fixes: #6755
											
										
										
											2017-09-08 16:16:29 +02:00
+								                return current;
-												seccomp: default to something resembling the current personality when locking it

Let's lock the personality to the currently set one, if nothing is
specifically specified. But do so with a grain of salt, and never
default to any exotic personality here, but only PER_LINUX or
PER_LINUX32.

											
										
										
											2017-08-09 20:40:26 +02:00
 								        if (((unsigned long) current & 0xffff) == PER_LINUX32)
 								                *ret = PER_LINUX32;
 								        else
 								                *ret = PER_LINUX;
 								        return 0;
 								}
-												core: add valgrind helper for daemon-reexec

Inspired by https://github.com/systemd/systemd/issues/2187#issuecomment-165587140

											
										
										
											2016-01-19 16:48:45 +01:00
+								void valgrind_summary_hack(void) {
-												build-sys: use #if Y instead of #ifdef Y everywhere

The advantage is that is the name is mispellt, cpp will warn us.

$ git grep -Ee "conf.set\('(HAVE|ENABLE)_" -l|xargs sed -r -i "s/conf.set\('(HAVE|ENABLE)_/conf.set10('\1_/"
$ git grep -Ee '#ifn?def (HAVE|ENABLE)' -l|xargs sed -r -i 's/#ifdef (HAVE|ENABLE)/#if \1/; s/#ifndef (HAVE|ENABLE)/#if ! \1/;'
$ git grep -Ee 'if.*defined\(HAVE' -l|xargs sed -i -r 's/defined\((HAVE_[A-Z0-9_]*)\)/\1/g'
$ git grep -Ee 'if.*defined\(ENABLE' -l|xargs sed -i -r 's/defined\((ENABLE_[A-Z0-9_]*)\)/\1/g'
+ manual changes to meson.build

squash! build-sys: use #if Y instead of #ifdef Y everywhere

v2:
- fix incorrect setting of HAVE_LIBIDN2

											
										
										
											2017-10-03 10:41:51 +02:00
+								#if HAVE_VALGRIND_VALGRIND_H
-												tree-wide: make use of getpid_cached() wherever we can

This moves pretty much all uses of getpid() over to getpid_raw(). I
didn't specifically check whether the optimization is worth it for each
replacement, but in order to keep things simple and systematic I
switched over everything at once.

											
										
										
											2017-07-20 16:19:18 +02:00
+								        if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
-												core: add valgrind helper for daemon-reexec

Inspired by https://github.com/systemd/systemd/issues/2187#issuecomment-165587140

											
										
										
											2016-01-19 16:48:45 +01:00
+								                pid_t pid;
-												util-lib: Add sparc64 support for process creation (#3348)

The current raw_clone function takes two arguments, the cloning flags and
a pointer to the stack for the cloned child. The raw cloning without
passing a "thread main" function does not make sense if a new stack is
specified, as it returns in both the parent and the child, which will fail
in the child as the stack is virgin. All uses of raw_clone indeed pass NULL
for the stack pointer which indicates that both processes should share the
stack address (so you better don't pass CLONE_VM).

This commit refactors the code to not require the caller to pass the stack
address, as NULL is the only sensible option. It also adds the magic code
needed to make raw_clone work on sparc64, which does not return 0 in %o0
for the child, but indicates the child process by setting %o1 to non-zero.
This refactoring is not plain aesthetic, because non-NULL stack addresses
need to get mangled before being passed to the clone syscall (you have to
apply STACK_BIAS), whereas NULL must not be mangled. Implementing the
conditional mangling of the stack address would needlessly complicate the
code.

raw_clone is moved to a separete header, because the burden of including
the assert machinery and sched.h shouldn't be applied to every user of
missing_syscalls.h
											
										
										
											2016-05-30 02:03:51 +02:00
+								                pid = raw_clone(SIGCHLD);
-												core: add valgrind helper for daemon-reexec

Inspired by https://github.com/systemd/systemd/issues/2187#issuecomment-165587140

											
										
										
											2016-01-19 16:48:45 +01:00
+								                if (pid < 0)
 								                        log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
 								                else if (pid == 0)
 								                        exit(EXIT_SUCCESS);
 								                else {
 								                        log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
 								                        (void) wait_for_terminate(pid, NULL);
 								                }
 								        }
 								#endif
 								}
-												tree-wide: use typesafe_qsort()

											
										
										
											2018-09-18 01:39:24 +02:00
+								int pid_compare_func(const pid_t *a, const pid_t *b) {
-												core,systemctl: add bus API to retrieve processes of a unit

This adds a new GetProcesses() bus call to the Unit object which returns an
array consisting of all PIDs, their process names, as well as their full cgroup
paths. This is then used by "systemctl status" to show the per-unit process
tree.

This has the benefit that the client-side no longer needs to access the
cgroupfs directly to show the process tree of a unit. Instead, it now uses this
new API, which means it also works if -H or -M are used correctly, as the
information from the specific host is used, and not the one from the local
system.

Fixes: #2945

											
										
										
											2016-04-20 15:28:28 +02:00
+								        /* Suitable for usage in qsort() */
-												tree-wide: use typesafe_qsort()

											
										
										
											2018-09-18 01:39:24 +02:00
+								        return CMP(*a, *b);
-												core,systemctl: add bus API to retrieve processes of a unit

This adds a new GetProcesses() bus call to the Unit object which returns an
array consisting of all PIDs, their process names, as well as their full cgroup
paths. This is then used by "systemctl status" to show the per-unit process
tree.

This has the benefit that the client-side no longer needs to access the
cgroupfs directly to show the process tree of a unit. Instead, it now uses this
new API, which means it also works if -H or -M are used correctly, as the
information from the specific host is used, and not the one from the local
system.

Fixes: #2945

											
										
										
											2016-04-20 15:28:28 +02:00
+								}
-												core: make IOSchedulingClass= and IOSchedulingPriority= settable for transient units

This patch is a bit more complex thant I hoped. In particular the single
IOScheduling= property exposed on the bus is split up into
IOSchedulingClass= and IOSchedulingPriority= (though compat is
retained). Otherwise the asymmetry between setting props and getting
them is a bit too nasty.

Fixes #5613

											
										
										
											2017-06-26 17:40:08 +02:00
+								int ioprio_parse_priority(const char *s, int *ret) {
 								        int i, r;
 								        assert(s);
 								        assert(ret);
 								        r = safe_atoi(s, &i);
 								        if (r < 0)
 								                return r;
 								        if (!ioprio_priority_is_valid(i))
 								                return -EINVAL;
 								        *ret = i;
 								        return 0;
 								}
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
+								/* The cached PID, possible values:
 								 *
 								 *     == UNSET [0]  → cache not initialized yet
 								 *     == BUSY [-1]  → some thread is initializing it at the moment
 								 *     any other     → the cached PID
 								 */
 								#define CACHED_PID_UNSET ((pid_t) 0)
 								#define CACHED_PID_BUSY ((pid_t) -1)
 								static pid_t cached_pid = CACHED_PID_UNSET;
-												raw-clone: beef up raw_clone() wrapper a bit

First of all, let's return pid_t, which appears to be the correct type
given that we return PIDs, and it#s what fork() uses too.

Most importantly though, flush out our PID cache, so that the call
becomes compatible with our getpid_cached() logic.

											
										
										
											2017-12-29 16:45:04 +01:00
+								void reset_cached_pid(void) {
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
+								        /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
 								        cached_pid = CACHED_PID_UNSET;
 								}
 								/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
 								 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
 								 * libpthread, as it is part of glibc anyway. */
-												process-util: do not hide global variable

Suggested by LGTM.

											
										
										
											2018-07-16 20:06:33 +02:00
+								extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle);
-												tree-wide: use gcc attribute macros where appropriate

We have these macros already, hence use them.

											
										
										
											2018-12-03 13:08:33 +01:00
+								extern void* __dso_handle _weak_;
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
 								pid_t getpid_cached(void) {
-												process-util: don't install atfork() handler more than once

											
										
										
											2018-02-26 20:50:57 +01:00
+								        static bool installed = false;
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
+								        pid_t current_value;
 								        /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
 								         * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
 								         * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
 								         * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
 								         *
 								         * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
-												Link to the right glibc commit in comment (#6884)

Reported by Marcos Mello.

Fixes #6882.
											
										
										
											2017-09-21 20:54:16 +02:00
+								         * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
+								         */
 								        current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
 								        switch (current_value) {
 								        case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
 								                pid_t new_pid;
-												process-util: use raw_getpid() in getpid_cache() internally (#8115)

We have the raw_getpid() definition in place anyway, and it's certainly
beneficial to expose the same semantics on pre glibc 2.24 and after it
too, hence always bypass glibc for this, and always cache things on our
side.

Fixes: #8113
											
										
										
											2018-02-07 03:10:09 +01:00
+								                new_pid = raw_getpid();
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
-												process-util: don't install atfork() handler more than once

											
										
										
											2018-02-26 20:50:57 +01:00
+								                if (!installed) {
 								                        /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
 								                         * only half-documented (glibc doesn't document it but LSB does — though only superficially)
 								                         * we'll check for errors only in the most generic fashion possible. */
 								                        if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
 								                                /* OOM? Let's try again later */
 								                                cached_pid = CACHED_PID_UNSET;
 								                                return new_pid;
 								                        }
 								                        installed = true;
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
+								                }
 								                cached_pid = new_pid;
 								                return new_pid;
 								        }
 								        case CACHED_PID_BUSY: /* Somebody else is currently initializing */
-												process-util: use raw_getpid() in getpid_cache() internally (#8115)

We have the raw_getpid() definition in place anyway, and it's certainly
beneficial to expose the same semantics on pre glibc 2.24 and after it
too, hence always bypass glibc for this, and always cache things on our
side.

Fixes: #8113
											
										
										
											2018-02-07 03:10:09 +01:00
+								                return raw_getpid();
-												process-util: add getpid_cached() as a caching wrapper for getpid()

Let's make getpid() fast again.

											
										
										
											2017-07-20 15:46:05 +02:00
 								        default: /* Properly initialized */
 								                return current_value;
 								        }
 								}
-												tree-wide: unify logging of "Must be root" message

Let's unify this in one call, generalizing must_be_root() from
bootctl.c.

											
										
										
											2017-12-11 23:00:57 +01:00
+								int must_be_root(void) {
 								        if (geteuid() == 0)
 								                return 0;
-												coccinelle: make use of SYNTHETIC_ERRNO

Ideally, coccinelle would strip unnecessary braces too. But I do not see any
option in coccinelle for this, so instead, I edited the patch text using
search&replace to remove the braces. Unfortunately this is not fully automatic,
in particular it didn't deal well with if-else-if-else blocks and ifdefs, so
there is an increased likelikehood be some bugs in such spots.

I also removed part of the patch that coccinelle generated for udev, where we
returns -1 for failure. This should be fixed independently.

											
										
										
											2018-11-20 23:40:44 +01:00
+								        return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
-												tree-wide: unify logging of "Must be root" message

Let's unify this in one call, generalizing must_be_root() from
bootctl.c.

											
										
										
											2017-12-11 23:00:57 +01:00
+								}
-												safe_fork: use a cleanup function to restore signal mask in parent

											
										
										
											2020-02-11 17:17:49 +01:00
+								static void restore_sigsetp(sigset_t **ssp) {
 								        if (*ssp)
 								                (void) sigprocmask(SIG_SETMASK, *ssp, NULL);
 								}
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								int safe_fork_full(
 								                const char *name,
 								                const int except_fds[],
 								                size_t n_except_fds,
 								                ForkFlags flags,
 								                pid_t *ret_pid) {
 								        pid_t original_pid, pid;
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
+								        sigset_t saved_ss, ss;
-												safe_fork: use a cleanup function to restore signal mask in parent

											
										
										
											2020-02-11 17:17:49 +01:00
+								        _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL;
-												safe_fork: unblock most signals before waiting for child

This ensures we will recieve SIGTSTP if the user presses Ctrl-Z.

We continue blocking SIGCHLD to ensure the child is processed by
wait_for_terminate_and_check.

Fixes: https://github.com/systemd/systemd/issues/9806

											
										
										
											2020-02-10 23:17:02 +01:00
+								        bool block_signals = false, block_all = false;
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								        int prio, r;
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
 								        /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
 								         * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								        prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        original_pid = getpid_cached();
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
+								        if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) {
 								                /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can
 								                 * be sure that SIGTERMs are not lost we might send to the child. */
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
-												tree-wide: use assert_se() for signal operations with constants

Continuation of a3ebe5eb620e49f0d24082876cafc7579261e64f:
in other places we sometimes use assert_se(), and sometimes normal error
handling. sigfillset and sigaddset can only fail if mask is NULL (which cannot
happen if we are passing in a reference), or if the signal number is invalid
(which really shouldn't happen when we are using a constant like SIGCHLD. If
SIGCHLD is invalid, we have a bigger problem). So let's simplify things and
always use assert_se() in those cases.

In sigset_add_many() we could conceivably pass an invalid signal, so let's keep
normal error handling here. The caller can do assert_se() around the
sigprocmask_many() call if appropriate.

'>= 0' is used for consistency with the rest of the codebase.

											
										
										
											2018-12-21 09:20:15 +01:00
+								                assert_se(sigfillset(&ss) >= 0);
-												safe_fork: unblock most signals before waiting for child

This ensures we will recieve SIGTSTP if the user presses Ctrl-Z.

We continue blocking SIGCHLD to ensure the child is processed by
wait_for_terminate_and_check.

Fixes: https://github.com/systemd/systemd/issues/9806

											
										
										
											2020-02-10 23:17:02 +01:00
+								                block_signals = block_all = true;
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
 								        } else if (flags & FORK_WAIT) {
 								                /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
-												tree-wide: use assert_se() for signal operations with constants

Continuation of a3ebe5eb620e49f0d24082876cafc7579261e64f:
in other places we sometimes use assert_se(), and sometimes normal error
handling. sigfillset and sigaddset can only fail if mask is NULL (which cannot
happen if we are passing in a reference), or if the signal number is invalid
(which really shouldn't happen when we are using a constant like SIGCHLD. If
SIGCHLD is invalid, we have a bigger problem). So let's simplify things and
always use assert_se() in those cases.

In sigset_add_many() we could conceivably pass an invalid signal, so let's keep
normal error handling here. The caller can do assert_se() around the
sigprocmask_many() call if appropriate.

'>= 0' is used for consistency with the rest of the codebase.

											
										
										
											2018-12-21 09:20:15 +01:00
+								                assert_se(sigemptyset(&ss) >= 0);
 								                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
+								                block_signals = true;
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        }
-												safe_fork: use a cleanup function to restore signal mask in parent

											
										
										
											2020-02-11 17:17:49 +01:00
+								        if (block_signals) {
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
+								                if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
 								                        return log_full_errno(prio, errno, "Failed to set signal mask: %m");
-												safe_fork: use a cleanup function to restore signal mask in parent

											
										
										
											2020-02-11 17:17:49 +01:00
+								                saved_ssp = &saved_ss;
 								        }
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
-												process-util: add new FORK_NEW_MOUNTNS flag to safe_fork()

That way we can move one more code location to use safe_fork()

											
										
										
											2017-12-29 18:52:20 +01:00
+								        if (flags & FORK_NEW_MOUNTNS)
 								                pid = raw_clone(SIGCHLD|CLONE_NEWNS);
 								        else
 								                pid = fork();
-												safe_fork: use a cleanup function to restore signal mask in parent

											
										
										
											2020-02-11 17:17:49 +01:00
+								        if (pid < 0)
 								                return log_full_errno(prio, errno, "Failed to fork: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        if (pid > 0) {
 								                /* We are in the parent process */
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
+								                log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
 								                if (flags & FORK_WAIT) {
-												safe_fork: unblock most signals before waiting for child

This ensures we will recieve SIGTSTP if the user presses Ctrl-Z.

We continue blocking SIGCHLD to ensure the child is processed by
wait_for_terminate_and_check.

Fixes: https://github.com/systemd/systemd/issues/9806

											
										
										
											2020-02-10 23:17:02 +01:00
+								                        if (block_all) {
 								                                /* undo everything except SIGCHLD */
 								                                ss = saved_ss;
 								                                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
 								                                (void) sigprocmask(SIG_SETMASK, &ss, NULL);
 								                        }
-												process-spec: add another flag FORK_WAIT to safe_fork()

This new flag will cause safe_fork() to wait for the forked off child
before returning. This allows us to unify a number of cases where we
immediately wait on the forked off child, witout running any code in the
parent after the fork, and without direct interest in the precise exit
status of the process, except recgonizing EXIT_SUCCESS vs everything
else.

											
										
										
											2017-12-29 18:01:37 +01:00
+								                        r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
 								                        if (r < 0)
 								                                return r;
 								                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
 								                                return -EPROTO;
 								                }
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                if (ret_pid)
 								                        *ret_pid = pid;
 								                return 1;
 								        }
 								        /* We are in the child process */
-												safe_fork: use a cleanup function to restore signal mask in parent

											
										
										
											2020-02-11 17:17:49 +01:00
+								        /* Restore signal mask manually */
 								        saved_ssp = NULL;
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        if (flags & FORK_REOPEN_LOG) {
 								                /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
 								                log_close();
 								                log_set_open_when_needed(true);
 								        }
 								        if (name) {
 								                r = rename_process(name);
 								                if (r < 0)
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
 								                                       r, "Failed to rename process, ignoring: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        }
-												pager: set PR_DEATHSIG for pager to SIGINT rather than SIGTERM

"less" doesn't properly reset its terminal on SIGTERM, it does so only
on SIGINT. Let's thus configure SIGINT instead of SIGTERM.

I think this is something less should fix too, and clean up things
correctly on SIGTERM, too. However, given that we explicitly enable
SIGINT behaviour by passing "K" to $LESS I figure it makes sense if we
also send SIGINT instead of SIGTERM to match it.

Fixes: #16084

											
										
										
											2020-06-09 08:59:33 +02:00
+								        if (flags & (FORK_DEATHSIG|FORK_DEATHSIG_SIGINT))
 								                if (prctl(PR_SET_PDEATHSIG, (flags & FORK_DEATHSIG_SIGINT) ? SIGINT : SIGTERM) < 0) {
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(prio, errno, "Failed to set death signal: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        _exit(EXIT_FAILURE);
 								                }
 								        if (flags & FORK_RESET_SIGNALS) {
 								                r = reset_all_signal_handlers();
 								                if (r < 0) {
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(prio, r, "Failed to reset signal handlers: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        _exit(EXIT_FAILURE);
 								                }
 								                /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
 								                r = reset_signal_mask();
 								                if (r < 0) {
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(prio, r, "Failed to reset signal mask: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        _exit(EXIT_FAILURE);
 								                }
 								        } else if (block_signals) { /* undo what we did above */
 								                if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(prio, errno, "Failed to restore signal mask: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        _exit(EXIT_FAILURE);
 								                }
 								        }
 								        if (flags & FORK_DEATHSIG) {
-												fix machinectl shell (in machined) (#7785)

4c253ed broke machined
$machinectl shell arch
Failed to get shell PTY: Input/output error

Closes: #7779

v2: do not drop DEATHSIG flag
											
										
										
											2018-01-04 12:37:15 +01:00
+								                pid_t ppid;
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                /* Let's see if the parent PID is still the one we started from? If not, then the parent
 								                 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
-												fix machinectl shell (in machined) (#7785)

4c253ed broke machined
$machinectl shell arch
Failed to get shell PTY: Input/output error

Closes: #7779

v2: do not drop DEATHSIG flag
											
										
										
											2018-01-04 12:37:15 +01:00
+								                ppid = getppid();
 								                if (ppid == 0)
-												all: fix minor typos

[thaller@redhat.com: original patch by Yuri, extracted from [1]]

[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/565

											
										
										
											2020-07-04 10:37:01 +02:00
+								                        /* Parent is in a different PID namespace. */;
-												fix machinectl shell (in machined) (#7785)

4c253ed broke machined
$machinectl shell arch
Failed to get shell PTY: Input/output error

Closes: #7779

v2: do not drop DEATHSIG flag
											
										
										
											2018-01-04 12:37:15 +01:00
+								                else if (ppid != original_pid) {
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        log_debug("Parent died early, raising SIGTERM.");
 								                        (void) raise(SIGTERM);
 								                        _exit(EXIT_FAILURE);
 								                }
 								        }
-												Add macro for checking if some flags are set

This way we don't need to repeat the argument twice.
I didn't replace all instances. I think it's better to leave out:
- asserts
- comparisons like x & y == x, which are mathematically equivalent, but
  here we aren't checking if flags are set, but if the argument fits in the
  flags.

											
										
										
											2018-04-20 15:36:20 +02:00
+								        if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
-												process-util: add a new FORK_MOUNTNS_SLAVE flag for safe_fork()

We already have a flag for creating a new mount namespace for the child.
Let's add an extension to that: a new FORK_MOUNTNFS_SLAVE flag. When
used in combination will mark all mounts in the child namespace as
MS_SLAVE so that the child can freely mount or unmount stuff but it
won't leak into the parent.

											
										
										
											2018-03-23 20:52:46 +01:00
 								                /* Optionally, make sure we never propagate mounts to the host. */
 								                if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
 								                        log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
 								                        _exit(EXIT_FAILURE);
 								                }
 								        }
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        if (flags & FORK_CLOSE_ALL_FDS) {
 								                /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
 								                log_close();
 								                r = close_all_fds(except_fds, n_except_fds);
 								                if (r < 0) {
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(prio, r, "Failed to close all file descriptors: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        _exit(EXIT_FAILURE);
 								                }
 								        }
 								        /* When we were asked to reopen the logs, do so again now */
 								        if (flags & FORK_REOPEN_LOG) {
 								                log_open();
 								                log_set_open_when_needed(false);
 								        }
 								        if (flags & FORK_NULL_STDIO) {
 								                r = make_null_stdio();
 								                if (r < 0) {
-												process-util: add another fork_safe() flag for enabling LOG_ERR/LOG_WARN logging

											
										
										
											2017-12-27 21:49:19 +01:00
+								                        log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								                        _exit(EXIT_FAILURE);
 								                }
-												process-util: add new safe_fork() flag for connecting stdout to stderr

This adds a new safe_fork() flag. If set the child process' fd 1 becomes
fd 2 of the caller. This is useful for invoking tools (such as various
mkfs/fsck implementations) that output status messages to stdout, but
which we invoke and don't want to pollute stdout with their output.

											
										
										
											2019-05-06 22:38:43 +02:00
 								        } else if (flags & FORK_STDOUT_TO_STDERR) {
 								                if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) {
-												Fix use of unitialized variable in error path

CID 1408478.

											
										
										
											2019-12-04 14:51:33 +01:00
+								                        log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m");
-												process-util: add new safe_fork() flag for connecting stdout to stderr

This adds a new safe_fork() flag. If set the child process' fd 1 becomes
fd 2 of the caller. This is useful for invoking tools (such as various
mkfs/fsck implementations) that output status messages to stdout, but
which we invoke and don't want to pollute stdout with their output.

											
										
										
											2019-05-06 22:38:43 +02:00
+								                        _exit(EXIT_FAILURE);
 								                }
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        }
-												process-util: add new FORK_RLIMIT_NOFILE_SAFE flag for safe_fork()

The new flag simply means rlimit_nofile_safe() is called in the child
after all fds are rearranged.

											
										
										
											2018-11-26 15:59:17 +01:00
+								        if (flags & FORK_RLIMIT_NOFILE_SAFE) {
 								                r = rlimit_nofile_safe();
 								                if (r < 0) {
 								                        log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
 								                        _exit(EXIT_FAILURE);
 								                }
 								        }
-												tree-wide: introduce new safe_fork() helper and port everything over

This adds a new safe_fork() wrapper around fork() and makes use of it
everywhere. The new wrapper does a couple of things we previously did
manually and separately in a safer, more correct and automatic way:

1. Optionally resets signal handlers/mask in the child

2. Sets a name on all processes we fork off right after forking off (and
   the patch assigns useful names for all processes we fork off now,
   following a systematic naming scheme: always enclosed in () – in order
   to indicate that these are not proper, exec()ed processes, but only
   forked off children, and if the process is long-running with only our
   own code, without execve()'ing something else, it gets am "sd-" prefix.)

3. Optionally closes all file descriptors in the child

4. Optionally sets a PR_SET_DEATHSIG to SIGTERM in the child, in a safe
   way so that the parent dying before this happens being handled
   safely.

5. Optionally reopens the logs

6. Optionally connects stdin/stdout/stderr to /dev/null

7. Debug logs about the forked off processes.

											
										
										
											2017-12-22 13:08:14 +01:00
+								        if (ret_pid)
 								                *ret_pid = getpid_cached();
 								        return 0;
 								}
-												core: add namespace_fork() helper, that forks, joins a set of namespaces and forks again

This helper is useful to ensure pidns/userns joining is properly
executed (as that requires a fork after the setns()). This is
particularly important when it comes to /proc/self/ access or
SCM_CREDENTIALS, but is generally the safer mode of operation.

											
										
										
											2018-11-12 23:37:13 +01:00
+								int namespace_fork(
 								                const char *outer_name,
 								                const char *inner_name,
 								                const int except_fds[],
 								                size_t n_except_fds,
 								                ForkFlags flags,
 								                int pidns_fd,
 								                int mntns_fd,
 								                int netns_fd,
 								                int userns_fd,
 								                int root_fd,
 								                pid_t *ret_pid) {
 								        int r;
 								        /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
 								         * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
 								         * /proc/self/fd works correctly. */
 								        r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
 								        if (r < 0)
 								                return r;
 								        if (r == 0) {
 								                pid_t pid;
 								                /* Child */
 								                r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
 								                if (r < 0) {
 								                        log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
 								                        _exit(EXIT_FAILURE);
 								                }
 								                /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
 								                r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid);
 								                if (r < 0)
 								                        _exit(EXIT_FAILURE);
 								                if (r == 0) {
 								                        /* Child */
 								                        if (ret_pid)
 								                                *ret_pid = pid;
 								                        return 0;
 								                }
 								                r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
 								                if (r < 0)
 								                        _exit(EXIT_FAILURE);
 								                _exit(r);
 								        }
 								        return 1;
 								}
-												tree-wide: be more careful with the type of array sizes

Previously we were a bit sloppy with the index and size types of arrays,
we'd regularly use unsigned. While I don't think this ever resulted in
real issues I think we should be more careful there and follow a
stricter regime: unless there's a strong reason not to use size_t for
array sizes and indexes, size_t it should be. Any allocations we do
ultimately will use size_t anyway, and converting forth and back between
unsigned and size_t will always be a source of problems.

Note that on 32bit machines "unsigned" and "size_t" are equivalent, and
on 64bit machines our arrays shouldn't grow that large anyway, and if
they do we have a problem, however that kind of overly large allocation
we have protections for usually, but for overflows we do not have that
so much, hence let's add it.

So yeah, it's a story of the current code being already "good enough",
but I think some extra type hygiene is better.

This patch tries to be comprehensive, but it probably isn't and I missed
a few cases. But I guess we can cover that later as we notice it. Among
smaller fixes, this changes:

1. strv_length()' return type becomes size_t

2. the unit file changes array size becomes size_t

3. DNS answer and query array sizes become size_t

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76745

											
										
										
											2018-04-27 14:09:31 +02:00
+								int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
-												process-util: move fork_agent() to process-util.[ch]

It's a relatively small wrapper around safe_fork() now, hence let's move
it over, and make its signature even more alike. Also, set a different
process name for the polkit and askpw agents.

											
										
										
											2017-12-22 15:07:22 +01:00
+								        bool stdout_is_tty, stderr_is_tty;
-												tree-wide: be more careful with the type of array sizes

Previously we were a bit sloppy with the index and size types of arrays,
we'd regularly use unsigned. While I don't think this ever resulted in
real issues I think we should be more careful there and follow a
stricter regime: unless there's a strong reason not to use size_t for
array sizes and indexes, size_t it should be. Any allocations we do
ultimately will use size_t anyway, and converting forth and back between
unsigned and size_t will always be a source of problems.

Note that on 32bit machines "unsigned" and "size_t" are equivalent, and
on 64bit machines our arrays shouldn't grow that large anyway, and if
they do we have a problem, however that kind of overly large allocation
we have protections for usually, but for overflows we do not have that
so much, hence let's add it.

So yeah, it's a story of the current code being already "good enough",
but I think some extra type hygiene is better.

This patch tries to be comprehensive, but it probably isn't and I missed
a few cases. But I guess we can cover that later as we notice it. Among
smaller fixes, this changes:

1. strv_length()' return type becomes size_t

2. the unit file changes array size becomes size_t

3. DNS answer and query array sizes become size_t

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76745

											
										
										
											2018-04-27 14:09:31 +02:00
+								        size_t n, i;
-												process-util: move fork_agent() to process-util.[ch]

It's a relatively small wrapper around safe_fork() now, hence let's move
it over, and make its signature even more alike. Also, set a different
process name for the polkit and askpw agents.

											
										
										
											2017-12-22 15:07:22 +01:00
+								        va_list ap;
 								        char **l;
 								        int r;
 								        assert(path);
 								        /* Spawns a temporary TTY agent, making sure it goes away when we go away */
 								        r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
 								        if (r < 0)
 								                return r;
 								        if (r > 0)
 								                return 0;
 								        /* In the child: */
 								        stdout_is_tty = isatty(STDOUT_FILENO);
 								        stderr_is_tty = isatty(STDERR_FILENO);
 								        if (!stdout_is_tty || !stderr_is_tty) {
 								                int fd;
 								                /* Detach from stdout/stderr. and reopen
 								                 * /dev/tty for them. This is important to
 								                 * ensure that when systemctl is started via
 								                 * popen() or a similar call that expects to
 								                 * read EOF we actually do generate EOF and
 								                 * not delay this indefinitely by because we
 								                 * keep an unused copy of stdin around. */
 								                fd = open("/dev/tty", O_WRONLY);
 								                if (fd < 0) {
 								                        log_error_errno(errno, "Failed to open /dev/tty: %m");
 								                        _exit(EXIT_FAILURE);
 								                }
 								                if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
 								                        log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
 								                        _exit(EXIT_FAILURE);
 								                }
 								                if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
 								                        log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
 								                        _exit(EXIT_FAILURE);
 								                }
-												util: add new safe_close_above_stdio() wrapper

At various places we only want to close fds if they are not
stdin/stdout/stderr, i.e. fds 0, 1, 2. Let's add a unified helper call
for that, and port everything over.

											
										
										
											2018-02-26 15:41:38 +01:00
+								                safe_close_above_stdio(fd);
-												process-util: move fork_agent() to process-util.[ch]

It's a relatively small wrapper around safe_fork() now, hence let's move
it over, and make its signature even more alike. Also, set a different
process name for the polkit and askpw agents.

											
										
										
											2017-12-22 15:07:22 +01:00
+								        }
-												tree-wide: invoke rlimit_nofile_safe() before various exec{v,ve,l}() invocations

Whenever we invoke external, foreign code from code that has
RLIMIT_NOFILE's soft limit bumped to high values, revert it to 1024
first. This is a safety precaution for compatibility with programs using
select() which cannot operate with fds > 1024.

This commit adds the call to rlimit_nofile_safe() to all invocations of
exec{v,ve,l}() and friends that either are in code that we know runs
with RLIMIT_NOFILE bumped up (which is PID 1 and all journal code for
starters) or that is part of shared code that might end up there.

The calls are placed as early as we can in processes invoking a flavour
of execve(), but after the last time we do fd manipulations, so that we
can still take benefit of the high fd limits for that.

											
										
										
											2018-11-26 16:06:26 +01:00
+								        (void) rlimit_nofile_safe();
-												process-util: move fork_agent() to process-util.[ch]

It's a relatively small wrapper around safe_fork() now, hence let's move
it over, and make its signature even more alike. Also, set a different
process name for the polkit and askpw agents.

											
										
										
											2017-12-22 15:07:22 +01:00
+								        /* Count arguments */
 								        va_start(ap, path);
 								        for (n = 0; va_arg(ap, char*); n++)
 								                ;
 								        va_end(ap);
 								        /* Allocate strv */
-												tree-wide: use newa() rather than alloca() where we can

											
										
										
											2018-04-27 14:28:35 +02:00
+								        l = newa(char*, n + 1);
-												process-util: move fork_agent() to process-util.[ch]

It's a relatively small wrapper around safe_fork() now, hence let's move
it over, and make its signature even more alike. Also, set a different
process name for the polkit and askpw agents.

											
										
										
											2017-12-22 15:07:22 +01:00
 								        /* Fill in arguments */
 								        va_start(ap, path);
 								        for (i = 0; i <= n; i++)
 								                l[i] = va_arg(ap, char*);
 								        va_end(ap);
 								        execv(path, l);
 								        _exit(EXIT_FAILURE);
 								}
-												process-util: add new helper call for adjusting the OOM score

And let's make use of it in execute.c

											
										
										
											2018-05-07 20:44:41 +02:00
+								int set_oom_score_adjust(int value) {
 								        char t[DECIMAL_STR_MAX(int)];
 								        sprintf(t, "%i", value);
 								        return write_string_file("/proc/self/oom_score_adj", t,
 								                                 WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
 								}
-												process-util: add helper pidfd_get_pid()

It returns the pid_t a pidfd refers to.

											
										
										
											2019-10-30 16:35:48 +01:00
+								int pidfd_get_pid(int fd, pid_t *ret) {
 								        char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 								        _cleanup_free_ char *fdinfo = NULL;
 								        char *p;
 								        int r;
 								        if (fd < 0)
 								                return -EBADF;
 								        xsprintf(path, "/proc/self/fdinfo/%i", fd);
 								        r = read_full_file(path, &fdinfo, NULL);
 								        if (r == -ENOENT) /* if fdinfo doesn't exist we assume the process does not exist */
 								                return -ESRCH;
 								        if (r < 0)
 								                return r;
 								        p = startswith(fdinfo, "Pid:");
 								        if (!p) {
 								                p = strstr(fdinfo, "\nPid:");
 								                if (!p)
 								                        return -ENOTTY; /* not a pidfd? */
 								                p += 5;
 								        }
 								        p += strspn(p, WHITESPACE);
 								        p[strcspn(p, WHITESPACE)] = 0;
 								        return parse_pid(p, ret);
 								}
-												core: in execute, Never fail setting Nice priority

Instead, push to the closest possible Nice priority setting.

Replaces: #11397

											
										
										
											2017-08-01 18:38:05 +02:00
+								static int rlimit_to_nice(rlim_t limit) {
 								        if (limit <= 1)
 								                return PRIO_MAX-1; /* i.e. 19 */
 								        if (limit >= -PRIO_MIN + PRIO_MAX)
 								                return PRIO_MIN; /* i.e. -20 */
 								        return PRIO_MAX - (int) limit;
 								}
 								int setpriority_closest(int priority) {
 								        int current, limit, saved_errno;
 								        struct rlimit highest;
 								        /* Try to set requested nice level */
 								        if (setpriority(PRIO_PROCESS, 0, priority) >= 0)
 								                return 1;
 								        /* Permission failed */
 								        saved_errno = -errno;
 								        if (!ERRNO_IS_PRIVILEGE(saved_errno))
 								                return saved_errno;
 								        errno = 0;
 								        current = getpriority(PRIO_PROCESS, 0);
 								        if (errno != 0)
 								                return -errno;
 								        if (priority == current)
 								                return 1;
 								       /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't,
 								        * then the whole setpriority() system call is blocked to us, hence let's propagate the error
 								        * right-away */
 								        if (priority > current)
 								                return saved_errno;
 								        if (getrlimit(RLIMIT_NICE, &highest) < 0)
 								                return -errno;
 								        limit = rlimit_to_nice(highest.rlim_cur);
 								        /* We are already less nice than limit allows us */
 								        if (current < limit) {
 								                log_debug("Cannot raise nice level, permissions and the resource limit do not allow it.");
 								                return 0;
 								        }
 								        /* Push to the allowed limit */
 								        if (setpriority(PRIO_PROCESS, 0, limit) < 0)
 								                return -errno;
 								        log_debug("Cannot set requested nice level (%i), used next best (%i).", priority, limit);
 								        return 0;
 								}
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								static const char *const ioprio_class_table[] = {
 								        [IOPRIO_CLASS_NONE] = "none",
 								        [IOPRIO_CLASS_RT] = "realtime",
 								        [IOPRIO_CLASS_BE] = "best-effort",
-												Move cpus_in_affinity_mask() to cpu-set-util.[ch]

It just seems to fit better there and it's always better to have things
in shared/ rather than basic/.

											
										
										
											2019-05-28 21:28:31 +02:00
+								        [IOPRIO_CLASS_IDLE] = "idle",
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								};
-												unit-file: do not allow bogus IOSchedulingClass values

We have only three bits of space, i.e. 8 possible classes. Immediately reject
anything outside of that range. Add the fuzzer test case and an additional
unit test.

oss-fuzz #6908.

											
										
										
											2018-03-16 11:15:58 +01:00
+								DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES);
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
 								static const char *const sigchld_code_table[] = {
 								        [CLD_EXITED] = "exited",
 								        [CLD_KILLED] = "killed",
 								        [CLD_DUMPED] = "dumped",
 								        [CLD_TRAPPED] = "trapped",
 								        [CLD_STOPPED] = "stopped",
 								        [CLD_CONTINUED] = "continued",
 								};
 								DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
 								static const char* const sched_policy_table[] = {
 								        [SCHED_OTHER] = "other",
 								        [SCHED_BATCH] = "batch",
 								        [SCHED_IDLE] = "idle",
 								        [SCHED_FIFO] = "fifo",
-												Move cpus_in_affinity_mask() to cpu-set-util.[ch]

It just seems to fit better there and it's always better to have things
in shared/ rather than basic/.

											
										
										
											2019-05-28 21:28:31 +02:00
+								        [SCHED_RR] = "rr",
-												process-util: move a couple of process-related calls over

											
										
										
											2015-10-27 14:24:58 +01:00
+								};
 								DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);