Merge pull request #17079 from keszybz/late-exec-resolution

Resolve executable paths before execution, use fexecve()
This commit is contained in:
Lennart Poettering 2020-12-03 14:58:20 +01:00 committed by GitHub
commit 6dd16814a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 214 additions and 71 deletions

View File

@ -216,6 +216,7 @@ conf.set_quoted('SYSTEM_SYSVRCND_PATH', sysvrcnd_path)
conf.set_quoted('RC_LOCAL_PATH', get_option('rc-local'))
conf.set('ANSI_OK_COLOR', 'ANSI_' + get_option('ok-color').underscorify().to_upper())
conf.set10('ENABLE_FEXECVE', get_option('fexecve'))
conf.set_quoted('USER_CONFIG_UNIT_DIR', join_paths(pkgsysconfdir, 'user'))
conf.set_quoted('USER_DATA_UNIT_DIR', userunitdir)
@ -534,6 +535,7 @@ foreach ident : [
#include <signal.h>
#include <sys/wait.h>'''],
['mallinfo', '''#include <malloc.h>'''],
['execveat', '''#include <unistd.h>'''],
['close_range', '''#include <unistd.h>'''],
]
@ -3832,6 +3834,7 @@ foreach tuple : [
['link-timesyncd-shared', get_option('link-timesyncd-shared')],
['kernel-install', get_option('kernel-install')],
['systemd-analyze', conf.get('ENABLE_ANALYZE') == 1],
['fexecve'],
]
if tuple.length() >= 2

View File

@ -375,13 +375,15 @@ option('fuzz-tests', type : 'boolean', value : 'false',
option('install-tests', type : 'boolean', value : 'false',
description : 'install test executables')
option('ok-color', type: 'combo',
option('ok-color', type : 'combo',
choices : ['black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan',
'white', 'highlight-black', 'highlight-red', 'highlight-green',
'highlight-yellow', 'highlight-blue', 'highlight-magenta',
'highlight-cyan', 'highlight-white'],
value : 'green',
description: 'color of the "OK" status message')
option('fexecve', type : 'boolean', value : 'false',
description : 'use fexecve() to spawn children')
option('oss-fuzz', type : 'boolean', value : 'false',
description : 'build against oss-fuzz')

View File

@ -749,6 +749,25 @@ static inline int missing_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *info)
# define rt_sigqueueinfo missing_rt_sigqueueinfo
#endif
/* ======================================================================= */
#if !HAVE_EXECVEAT
static inline int missing_execveat(int dirfd, const char *pathname,
char *const argv[], char *const envp[],
int flags) {
# if defined __NR_execveat && __NR_execveat >= 0
return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
# else
errno = ENOSYS;
return -1;
# endif
}
# undef AT_EMPTY_PATH
# define AT_EMPTY_PATH 0x1000
# define execveat missing_execveat
#endif
/* ======================================================================= */
#define systemd_NR_close_range systemd_SC_arch_bias(436)

View File

@ -585,22 +585,53 @@ char* path_join_internal(const char *first, ...) {
return joined;
}
int find_executable_full(const char *name, bool use_path_envvar, char **ret) {
static int check_x_access(const char *path, int *ret_fd) {
if (ret_fd) {
_cleanup_close_ int fd = -1;
int r;
/* We need to use O_PATH because there may be executables for which we have only exec
* permissions, but not read (usually suid executables). */
fd = open(path, O_PATH|O_CLOEXEC);
if (fd < 0)
return -errno;
r = access_fd(fd, X_OK);
if (r < 0)
return r;
*ret_fd = TAKE_FD(fd);
} else {
/* Let's optimize things a bit by not opening the file if we don't need the fd. */
if (access(path, X_OK) < 0)
return -errno;
}
return 0;
}
int find_executable_full(const char *name, bool use_path_envvar, char **ret_filename, int *ret_fd) {
int last_error, r;
const char *p = NULL;
assert(name);
if (is_path(name)) {
if (access(name, X_OK) < 0)
return -errno;
_cleanup_close_ int fd = -1;
if (ret) {
r = path_make_absolute_cwd(name, ret);
r = check_x_access(name, ret_fd ? &fd : NULL);
if (r < 0)
return r;
if (ret_filename) {
r = path_make_absolute_cwd(name, ret_filename);
if (r < 0)
return r;
}
if (ret_fd)
*ret_fd = TAKE_FD(fd);
return 0;
}
@ -613,8 +644,10 @@ int find_executable_full(const char *name, bool use_path_envvar, char **ret) {
last_error = -ENOENT;
/* Resolve a single-component name to a full path */
for (;;) {
_cleanup_free_ char *j = NULL, *element = NULL;
_cleanup_close_ int fd = -1;
r = extract_first_word(&p, &element, ":", EXTRACT_RELAX|EXTRACT_DONT_COALESCE_SEPARATORS);
if (r < 0)
@ -629,7 +662,8 @@ int find_executable_full(const char *name, bool use_path_envvar, char **ret) {
if (!j)
return -ENOMEM;
if (access(j, X_OK) >= 0) {
r = check_x_access(j, ret_fd ? &fd : NULL);
if (r >= 0) {
_cleanup_free_ char *with_dash;
with_dash = strjoin(j, "/");
@ -643,8 +677,10 @@ int find_executable_full(const char *name, bool use_path_envvar, char **ret) {
/* We can't just `continue` inverting this case, since we need to update last_error. */
if (errno == ENOTDIR) {
/* Found it! */
if (ret)
*ret = path_simplify(TAKE_PTR(j), false);
if (ret_filename)
*ret_filename = path_simplify(TAKE_PTR(j), false);
if (ret_fd)
*ret_fd = TAKE_FD(fd);
return 0;
}

View File

@ -88,9 +88,9 @@ int path_strv_make_absolute_cwd(char **l);
char** path_strv_resolve(char **l, const char *root);
char** path_strv_resolve_uniq(char **l, const char *root);
int find_executable_full(const char *name, bool use_path_envvar, char **ret);
static inline int find_executable(const char *name, char **ret) {
return find_executable_full(name, true, ret);
int find_executable_full(const char *name, bool use_path_envvar, char **ret_filename, int *ret_fd);
static inline int find_executable(const char *name, char **ret_filename) {
return find_executable_full(name, true, ret_filename, NULL);
}
bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update);

View File

@ -1248,8 +1248,8 @@ static int setup_pam(
* termination */
barrier_set_role(&barrier, BARRIER_CHILD);
/* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
* are open here that have been opened by PAM. */
/* Make sure we don't keep open the passed fds in this child. We assume that otherwise only
* those fds are open here that have been opened by PAM. */
(void) close_many(fds, n_fds);
/* Drop privileges - we don't need any to pam_close_session
@ -2893,11 +2893,11 @@ static int setup_credentials(
#if ENABLE_SMACK
static int setup_smack(
const ExecContext *context,
const char *executable) {
int executable_fd) {
int r;
assert(context);
assert(executable);
assert(executable_fd >= 0);
if (context->smack_process_label) {
r = mac_smack_apply_pid(0, context->smack_process_label);
@ -2908,7 +2908,7 @@ static int setup_smack(
else {
_cleanup_free_ char *exec_label = NULL;
r = mac_smack_read(executable, SMACK_ATTR_EXEC, &exec_label);
r = mac_smack_read_fd(executable_fd, SMACK_ATTR_EXEC, &exec_label);
if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
return r;
@ -3419,7 +3419,6 @@ static int close_remaining_fds(
const DynamicCreds *dcreds,
int user_lookup_fd,
int socket_fd,
int exec_fd,
const int *fds, size_t n_fds) {
size_t n_dont_close = 0;
@ -3436,8 +3435,6 @@ static int close_remaining_fds(
if (socket_fd >= 0)
dont_close[n_dont_close++] = socket_fd;
if (exec_fd >= 0)
dont_close[n_dont_close++] = exec_fd;
if (n_fds > 0) {
memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
n_dont_close += n_fds;
@ -3614,6 +3611,35 @@ bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
return c->cpu_affinity_from_numa;
}
static int add_shifted_fd(int *fds, size_t fds_size, size_t *n_fds, int fd, int *ret_fd) {
int r;
assert(fds);
assert(n_fds);
assert(*n_fds < fds_size);
assert(ret_fd);
if (fd < 0) {
*ret_fd = -1;
return 0;
}
if (fd < 3 + (int) *n_fds) {
/* Let's move the fd up, so that it's outside of the fd range we will use to store
* the fds we pass to the process (or which are closed only during execve). */
r = fcntl(fd, F_DUPFD_CLOEXEC, 3 + (int) *n_fds);
if (r < 0)
return -errno;
CLOSE_AND_REPLACE(fd, r);
}
*ret_fd = fds[*n_fds] = fd;
(*n_fds) ++;
return 1;
}
static int exec_child(
Unit *unit,
const ExecCommand *command,
@ -3631,7 +3657,7 @@ static int exec_child(
int *exit_status) {
_cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
int r, ngids = 0, exec_fd;
_cleanup_free_ gid_t *supplementary_gids = NULL;
const char *username = NULL, *groupname = NULL;
_cleanup_free_ char *home_buffer = NULL;
@ -3658,7 +3684,8 @@ static int exec_child(
gid_t saved_gid = getgid();
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
size_t n_fds;
size_t n_fds = n_socket_fds + n_storage_fds, /* fds to pass to the child */
n_keep_fds; /* total number of fds not to close */
int secure_bits;
_cleanup_free_ gid_t *gids_after_pam = NULL;
int ngids_after_pam = 0;
@ -3702,8 +3729,17 @@ static int exec_child(
/* In case anything used libc syslog(), close this here, too */
closelog();
n_fds = n_socket_fds + n_storage_fds;
r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
int keep_fds[n_fds + 2];
memcpy_safe(keep_fds, fds, n_fds * sizeof(int));
n_keep_fds = n_fds;
r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, params->exec_fd, &exec_fd);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
}
r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, keep_fds, n_keep_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
@ -4095,6 +4131,7 @@ static int exec_child(
/* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
* wins here. (See above.) */
/* All fds passed in the fds array will be closed in the pam child process. */
r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_PAM;
@ -4202,7 +4239,8 @@ static int exec_child(
* shall execute. */
_cleanup_free_ char *executable = NULL;
r = find_executable_full(command->path, false, &executable);
_cleanup_close_ int executable_fd = -1;
r = find_executable_full(command->path, false, &executable, &executable_fd);
if (r < 0) {
if (r != -ENOMEM && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
log_struct_errno(LOG_INFO, r,
@ -4225,6 +4263,12 @@ static int exec_child(
"EXECUTABLE=%s", command->path);
}
r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, executable_fd, &executable_fd);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
}
#if HAVE_SELINUX
if (needs_sandboxing && use_selinux && params->selinux_context_net && socket_fd >= 0) {
r = mac_selinux_get_child_mls_label(socket_fd, executable, context->selinux_context, &mac_selinux_context_net);
@ -4239,41 +4283,7 @@ static int exec_child(
* more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
* however if we have it as we want to keep it open until the final execve(). */
if (params->exec_fd >= 0) {
exec_fd = params->exec_fd;
if (exec_fd < 3 + (int) n_fds) {
int moved_fd;
/* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
* process we are about to execute. */
moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
if (moved_fd < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
}
CLOSE_AND_REPLACE(exec_fd, moved_fd);
} else {
/* This fd should be FD_CLOEXEC already, but let's make sure. */
r = fd_cloexec(exec_fd, true);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
}
}
fds_with_exec_fd = newa(int, n_fds + 1);
memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
fds_with_exec_fd[n_fds] = exec_fd;
n_fds_with_exec_fd = n_fds + 1;
} else {
fds_with_exec_fd = fds;
n_fds_with_exec_fd = n_fds;
}
r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
r = close_all_fds(keep_fds, n_keep_fds);
if (r >= 0)
r = shift_fds(fds, n_fds);
if (r >= 0)
@ -4307,7 +4317,7 @@ static int exec_child(
/* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
* process. This is the latest place before dropping capabilities. Other MAC context are set later. */
if (use_smack) {
r = setup_smack(context, executable);
r = setup_smack(context, executable_fd);
if (r < 0) {
*exit_status = EXIT_SMACK_PROCESS_LABEL;
return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
@ -4577,8 +4587,7 @@ static int exec_child(
}
}
execve(executable, final_argv, accum_env);
r = -errno;
r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
if (exec_fd >= 0) {
uint8_t hot = 0;

View File

@ -11,11 +11,13 @@
#include "conf-files.h"
#include "env-file.h"
#include "env-util.h"
#include "errno-util.h"
#include "exec-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "hashmap.h"
#include "macro.h"
#include "missing_syscall.h"
#include "process-util.h"
#include "rlimit-util.h"
#include "serialize.h"
@ -33,7 +35,6 @@
assert_cc(EAGAIN == EWOULDBLOCK);
static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) {
pid_t _pid;
int r;
@ -444,3 +445,26 @@ ExecCommandFlags exec_command_flags_from_string(const char *s) {
else
return 1 << idx;
}
int fexecve_or_execve(int executable_fd, const char *executable, char *const argv[], char *const envp[]) {
#if ENABLE_FEXECVE
execveat(executable_fd, "", argv, envp, AT_EMPTY_PATH);
if (IN_SET(errno, ENOSYS, ENOENT) || ERRNO_IS_PRIVILEGE(errno))
/* Old kernel or a script or an overzealous seccomp filter? Let's fall back to execve().
*
* fexecve(3): "If fd refers to a script (i.e., it is an executable text file that names a
* script interpreter with a first line that begins with the characters #!) and the
* close-on-exec flag has been set for fd, then fexecve() fails with the error ENOENT. This
* error occurs because, by the time the script interpreter is executed, fd has already been
* closed because of the close-on-exec flag. Thus, the close-on-exec flag can't be set on fd
* if it refers to a script."
*
* Unfortunately, if we unset close-on-exec, the script will be executed just fine, but (at
* least in case of bash) the script name, $0, will be shown as /dev/fd/nnn, which breaks
* scripts which make use of $0. Thus, let's fall back to execve() in this case.
*/
#endif
execve(executable, argv, envp);
return -errno;
}

View File

@ -45,3 +45,5 @@ extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX];
const char* exec_command_flags_to_string(ExecCommandFlags i);
ExecCommandFlags exec_command_flags_from_string(const char *s);
int fexecve_or_execve(int executable_fd, const char *executable, char *const argv[], char *const envp[]);

View File

@ -4,10 +4,12 @@
#include <unistd.h>
#include "alloc-util.h"
#include "exec-util.h"
#include "fd-util.h"
#include "macro.h"
#include "mountpoint-util.h"
#include "path-util.h"
#include "process-util.h"
#include "rm-rf.h"
#include "stat-util.h"
#include "string-util.h"
@ -169,12 +171,12 @@ static void test_find_executable_full(void) {
log_info("/* %s */", __func__);
assert_se(find_executable_full("sh", true, &p) == 0);
assert_se(find_executable_full("sh", true, &p, NULL) == 0);
puts(p);
assert_se(streq(basename(p), "sh"));
free(p);
assert_se(find_executable_full("sh", false, &p) == 0);
assert_se(find_executable_full("sh", false, &p, NULL) == 0);
puts(p);
assert_se(streq(basename(p), "sh"));
free(p);
@ -186,12 +188,12 @@ static void test_find_executable_full(void) {
assert_se(unsetenv("PATH") == 0);
assert_se(find_executable_full("sh", true, &p) == 0);
assert_se(find_executable_full("sh", true, &p, NULL) == 0);
puts(p);
assert_se(streq(basename(p), "sh"));
free(p);
assert_se(find_executable_full("sh", false, &p) == 0);
assert_se(find_executable_full("sh", false, &p, NULL) == 0);
puts(p);
assert_se(streq(basename(p), "sh"));
free(p);
@ -236,6 +238,43 @@ static void test_find_executable(const char *self) {
assert_se(find_executable("/proc/filesystems", &p) == -EACCES);
}
static void test_find_executable_exec_one(const char *path) {
_cleanup_free_ char *t = NULL;
_cleanup_close_ int fd = -1;
pid_t pid;
int r;
r = find_executable_full(path, false, &t, &fd);
log_info_errno(r, "%s: %s → %s: %d/%m", __func__, path, t ?: "-", fd);
assert_se(fd > STDERR_FILENO);
assert_se(path_is_absolute(t));
if (path_is_absolute(path))
assert_se(streq(t, path));
pid = fork();
assert_se(pid >= 0);
if (pid == 0) {
r = fexecve_or_execve(fd, t, STRV_MAKE(t, "--version"), STRV_MAKE(NULL));
log_error_errno(r, "[f]execve: %m");
_exit(EXIT_FAILURE);
}
assert_se(wait_for_terminate_and_check(t, pid, WAIT_LOG) == 0);
}
static void test_find_executable_exec(void) {
log_info("/* %s */", __func__);
test_find_executable_exec_one("touch");
test_find_executable_exec_one("/bin/touch");
_cleanup_free_ char *script = NULL;
assert_se(get_testdata_dir("test-path-util/script.sh", &script) >= 0);
test_find_executable_exec_one(script);
}
static void test_prefixes(void) {
static const char* const values[] = {
"/a/b/c/d",
@ -717,6 +756,7 @@ int main(int argc, char **argv) {
test_path_equal_root();
test_find_executable_full();
test_find_executable(argv[0]);
test_find_executable_exec();
test_prefixes();
test_path_join();
test_fsck_exists();

View File

@ -11,6 +11,8 @@ if install_tests
install_dir : testdata_dir)
install_subdir('test-path',
install_dir : testdata_dir)
install_subdir('test-path-util',
install_dir : testdata_dir)
install_subdir('test-umount',
install_dir : testdata_dir)
install_subdir('test-network-generator-conversion',

View File

@ -2,7 +2,7 @@
Description=Test for SystemCallFilter
[Service]
ExecStart=/bin/sh -c 'echo "This should not be seen"'
ExecStart=/bin/sh -c '/bin/echo "This should not be seen"'
Type=oneshot
LimitCORE=0
SystemCallFilter=ioperm

View File

@ -2,7 +2,7 @@
Description=Test for SystemCallFilter
[Service]
ExecStart=/bin/sh -c 'echo "This should not be seen"'
ExecStart=/bin/sh -c '/bin/echo "This should not be seen"'
Type=oneshot
LimitCORE=0
SystemCallFilter=~write open execve exit_group close mmap munmap fstat DONOTEXIST
SystemCallFilter=~write open execve fexecve execveat exit_group close mmap munmap fstat DONOTEXIST

6
test/test-path-util/script.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/sh
echo "$0 $@"
test "$(basename $0)" = "script.sh" || exit 1
test "$1" = "--version" || exit 2
echo "Life is good"