Merge pull request #12106 from poettering/nosuidns

add "nosuid" flag to exec directory mounts of DynamicUser=1 services
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2019-03-26 08:58:00 +01:00 committed by GitHub
commit e1af3bc62a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 88 additions and 63 deletions

View file

@ -2367,6 +2367,7 @@ static int compile_bind_mounts(
.source = s,
.destination = d,
.read_only = false,
.nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
.recursive = true,
.ignore_enoent = false,
};

View file

@ -61,6 +61,7 @@ typedef struct MountEntry {
bool ignore:1; /* Ignore if path does not exist? */
bool has_prefix:1; /* Already is prefixed by the root dir? */
bool read_only:1; /* Shall this mount point be read-only? */
bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
bool applied:1; /* Already applied */
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
const char *source_const; /* The source path, for bind mounts */
@ -308,6 +309,7 @@ static int append_bind_mounts(MountEntry **p, const BindMount *binds, size_t n)
.path_const = b->destination,
.mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT,
.read_only = b->read_only,
.nosuid = b->nosuid,
.source_const = b->source,
.ignore = b->ignore_enoent,
};
@ -760,27 +762,27 @@ static int mount_private_dev(MountEntry *m) {
goto fail;
}
rmdir(dev);
rmdir(temporary_mount);
(void) rmdir(dev);
(void) rmdir(temporary_mount);
return 0;
fail:
if (devpts)
umount(devpts);
(void) umount(devpts);
if (devshm)
umount(devshm);
(void) umount(devshm);
if (devhugepages)
umount(devhugepages);
(void) umount(devhugepages);
if (devmqueue)
umount(devmqueue);
(void) umount(devmqueue);
umount(dev);
rmdir(dev);
rmdir(temporary_mount);
(void) umount(dev);
(void) rmdir(dev);
(void) rmdir(temporary_mount);
return r;
}
@ -1042,47 +1044,56 @@ static int apply_mount(
return 0;
}
/* Change the per-mount readonly flag on an existing mount */
static int remount_bind_readonly(const char *path, unsigned long orig_flags) {
int r;
/* Change per-mount flags on an existing mount */
static int bind_remount_one(const char *path, unsigned long orig_flags, unsigned long new_flags, unsigned long flags_mask) {
if (mount(NULL, path, NULL, (orig_flags & ~flags_mask) | MS_REMOUNT | MS_BIND | new_flags, NULL) < 0)
return -errno;
r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL);
return r < 0 ? -errno : 0;
return 0;
}
static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) {
unsigned long new_flags = 0, flags_mask = 0;
bool submounts = false;
int r = 0;
assert(m);
assert(proc_self_mountinfo);
if (mount_entry_read_only(m)) {
if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
r = remount_bind_readonly(mount_entry_path(m), m->flags);
} else {
submounts = true;
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
}
} else if (m->mode == PRIVATE_DEV) {
/* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag.
* We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */
r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS);
} else
if (mount_entry_read_only(m) || m->mode == PRIVATE_DEV) {
new_flags |= MS_RDONLY;
flags_mask |= MS_RDONLY;
}
if (m->nosuid) {
new_flags |= MS_NOSUID;
flags_mask |= MS_NOSUID;
}
if (flags_mask == 0) /* No Change? */
return 0;
/* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
* already stays this way. This improves compatibility with container managers, where we won't attempt to undo
* read-only mounts already applied. */
/* We generally apply these changes recursively, except for /dev, and the cases we know there's
* nothing further down. Set /dev readonly, but not submounts like /dev/shm. Also, we only set the
* per-mount read-only flag. We can't set it on the superblock, if we are inside a user namespace
* and running Linux <= 4.17. */
submounts =
mount_entry_read_only(m) &&
!IN_SET(m->mode, EMPTY_DIR, TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, blacklist, proc_self_mountinfo);
else
r = bind_remount_one(mount_entry_path(m), m->flags, new_flags, flags_mask);
/* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked
* read-only already stays this way. This improves compatibility with container managers, where we
* won't attempt to undo read-only mounts already applied. */
if (r == -ENOENT && m->ignore)
r = 0;
return 0;
if (r < 0)
return log_debug_errno(r, "Failed to re-mount '%s'%s read-only: %m", mount_entry_path(m),
return log_debug_errno(r, "Failed to re-mount '%s'%s: %m", mount_entry_path(m),
submounts ? " and its submounts" : "");
return 0;
}
@ -1182,7 +1193,7 @@ int setup_namespace(
_cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
_cleanup_free_ void *root_hash = NULL;
MountEntry *m, *mounts = NULL;
MountEntry *m = NULL, *mounts = NULL;
size_t n_mounts, root_hash_size = 0;
bool require_prefix = false;
const char *root;
@ -1246,7 +1257,10 @@ int setup_namespace(
protect_home, protect_system);
if (n_mounts > 0) {
m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
m = mounts = new0(MountEntry, n_mounts);
if (!mounts)
return -ENOMEM;
r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix);
if (r < 0)
goto finish;
@ -1291,6 +1305,7 @@ int setup_namespace(
*(m++) = (MountEntry) {
.path_const = "/dev",
.mode = PRIVATE_DEV,
.flags = DEV_MOUNT_OPTIONS,
};
}
@ -1417,7 +1432,7 @@ int setup_namespace(
if (n_mounts > 0) {
_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
char **blacklist;
_cleanup_free_ char **blacklist = NULL;
size_t j;
/* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of /proc.
@ -1463,7 +1478,11 @@ int setup_namespace(
}
/* Create a blacklist we can pass to bind_mount_recursive() */
blacklist = newa(char*, n_mounts+1);
blacklist = new(char*, n_mounts+1);
if (!blacklist) {
r = -ENOMEM;
goto finish;
}
for (j = 0; j < n_mounts; j++)
blacklist[j] = (char*) mount_entry_path(mounts+j);
blacklist[j] = NULL;
@ -1497,6 +1516,8 @@ finish:
for (m = mounts; m < mounts + n_mounts; m++)
mount_entry_done(m);
free(mounts);
return r;
}
@ -1539,6 +1560,7 @@ int bind_mount_add(BindMount **b, size_t *n, const BindMount *item) {
.source = TAKE_PTR(s),
.destination = TAKE_PTR(d),
.read_only = item->read_only,
.nosuid = item->nosuid,
.recursive = item->recursive,
.ignore_enoent = item->ignore_enoent,
};

View file

@ -59,6 +59,7 @@ struct BindMount {
char *source;
char *destination;
bool read_only:1;
bool nosuid:1;
bool recursive:1;
bool ignore_enoent:1;
};

View file

@ -731,7 +731,7 @@ static int mount_bind(const char *dest, CustomMount *m) {
return r;
if (m->read_only) {
r = bind_remount_recursive(where, true, NULL);
r = bind_remount_recursive(where, MS_RDONLY, MS_RDONLY, NULL);
if (r < 0)
return log_error_errno(r, "Read-only bind mount failed: %m");
}
@ -939,7 +939,7 @@ static int setup_volatile_state(
/* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
r = bind_remount_recursive(directory, true, NULL);
r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
if (r < 0)
return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
@ -1005,7 +1005,7 @@ static int setup_volatile_yes(
bind_mounted = true;
r = bind_remount_recursive(t, true, NULL);
r = bind_remount_recursive(t, MS_RDONLY, MS_RDONLY, NULL);
if (r < 0) {
log_error_errno(r, "Failed to remount %s read-only: %m", t);
goto fail;

View file

@ -3324,7 +3324,7 @@ static int outer_child(
return r;
if (arg_read_only && arg_volatile_mode == VOLATILE_NO) {
r = bind_remount_recursive(directory, true, NULL);
r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
if (r < 0)
return log_error_errno(r, "Failed to make tree read-only: %m");
}

View file

@ -29,8 +29,8 @@
#include "strv.h"
int umount_recursive(const char *prefix, int flags) {
bool again;
int n = 0, r;
bool again;
/* Try to umount everything recursively below a
* directory. Also, take care of stacked mounts, and keep
@ -73,9 +73,9 @@ int umount_recursive(const char *prefix, int flags) {
continue;
}
r = cunescape(path, UNESCAPE_RELAX, &p);
if (r < 0)
return r;
k = cunescape(path, UNESCAPE_RELAX, &p);
if (k < 0)
return k;
if (!path_startswith(p, prefix))
continue;
@ -95,7 +95,7 @@ int umount_recursive(const char *prefix, int flags) {
} while (again);
return r ? r : n;
return r < 0 ? r : n;
}
static int get_mount_flags(const char *path, unsigned long *flags) {
@ -107,10 +107,15 @@ static int get_mount_flags(const char *path, unsigned long *flags) {
return 0;
}
/* Use this function only if do you have direct access to /proc/self/mountinfo
* and need the caller to open it for you. This is the case when /proc is
* masked or not mounted. Otherwise, use bind_remount_recursive. */
int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
/* Use this function only if do you have direct access to /proc/self/mountinfo and need the caller to open it
* for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
int bind_remount_recursive_with_mountinfo(
const char *prefix,
unsigned long new_flags,
unsigned long flags_mask,
char **blacklist,
FILE *proc_self_mountinfo) {
_cleanup_set_free_free_ Set *done = NULL;
_cleanup_free_ char *cleaned = NULL;
int r;
@ -245,16 +250,12 @@ int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **bl
(void) get_mount_flags(cleaned, &orig_flags);
orig_flags &= ~MS_RDONLY;
if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
if (mount(NULL, cleaned, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
return -errno;
log_debug("Made top-level directory %s a mount point.", prefix);
x = strdup(cleaned);
if (!x)
return -ENOMEM;
r = set_consume(done, x);
r = set_put_strdup(done, cleaned);
if (r < 0)
return r;
}
@ -291,7 +292,7 @@ int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **bl
(void) get_mount_flags(x, &orig_flags);
orig_flags &= ~MS_RDONLY;
if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
if (mount(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
return -errno;
log_debug("Remounted %s read-only.", x);
@ -299,7 +300,7 @@ int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **bl
}
}
int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist) {
_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
@ -308,7 +309,7 @@ int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
(void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, blacklist, proc_self_mountinfo);
}
int mount_move_root(const char *path) {

View file

@ -8,8 +8,8 @@
int repeat_unmount(const char *path, int flags);
int umount_recursive(const char *target, int flags);
int bind_remount_recursive(const char *prefix, bool ro, char **blacklist);
int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo);
int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist);
int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist, FILE *proc_self_mountinfo);
int mount_move_root(const char *path);

View file

@ -42,7 +42,7 @@ static int make_volatile(const char *path) {
if (r < 0)
goto finish_umount;
r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", true, NULL);
r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", MS_RDONLY, MS_RDONLY, NULL);
if (r < 0) {
log_error_errno(r, "Failed to remount /usr read-only: %m");
goto finish_umount;