nspawn: make sure images containing an ESP are compatible with userns -U mode

In -U mode we might need to re-chown() all files and directories to
match the UID shift we want for the image. That's problematic on fat
partitions, such as the ESP (and which is generated by mkosi's
--bootable switch), because fat of course knows no UID/GID file
ownership natively.

With this change we take benefit of the uid= and gid= mount options FAT
knows: instead of chown()ing all files and directories we can just
specify the right UID/GID to use at mount time.

This beefs up the image dissection logic in two ways:

1. First of all support for mounting relevant file systems with
   uid=/gid= is added: when a UID is specified during mount it is used for
   all applicable file systems.

2. Secondly, two new mount flags are added:
   DISSECT_IMAGE_MOUNT_ROOT_ONLY and DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY.
   If one is specified the mount routine will either only mount the root
   partition of an image, or all partitions except the root partition.
   This is used by nspawn: first the root partition is mounted, so that
   we can determine the UID shift in use so far, based on ownership of
   the image's root directory. Then, we mount the remaining partitions
   in a second go, this time with the right UID/GID information.
This commit is contained in:
Lennart Poettering 2017-11-28 16:46:26 +01:00
parent bb8ad9eaca
commit 2d3a5a73e0
7 changed files with 77 additions and 23 deletions

View file

@ -673,6 +673,22 @@ bool fstype_can_discard(const char *fstype) {
"xfs");
}
bool fstype_can_uid_gid(const char *fstype) {
/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
* current and future. */
return STR_IN_SET(fstype,
"adfs",
"fat",
"hfs",
"hpfs",
"iso9660",
"msdos",
"ntfs",
"vfat");
}
int repeat_unmount(const char *path, int flags) {
bool done = false;

View file

@ -52,6 +52,7 @@ bool fstype_is_network(const char *fstype);
bool fstype_is_api_vfs(const char *fstype);
bool fstype_is_ro(const char *fsype);
bool fstype_can_discard(const char *fstype);
bool fstype_can_uid_gid(const char *fstype);
const char* mode_to_inaccessible_node(mode_t mode);

View file

@ -1155,7 +1155,7 @@ int setup_namespace(
if (root_image) {
/* A root image is specified, mount it to the right place */
r = dissected_image_mount(dissected_image, root, dissect_image_flags);
r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags);
if (r < 0)
goto finish;

View file

@ -29,6 +29,7 @@
#include "loop-util.h"
#include "string-util.h"
#include "strv.h"
#include "user-util.h"
#include "util.h"
static enum {
@ -303,7 +304,7 @@ int main(int argc, char *argv[]) {
if (r < 0)
goto finish;
r = dissected_image_mount(m, arg_path, arg_flags);
r = dissected_image_mount(m, arg_path, UID_INVALID, arg_flags);
if (r < 0) {
log_error_errno(r, "Failed to mount image: %m");
goto finish;

View file

@ -2582,7 +2582,13 @@ static int outer_child(
return r;
if (dissected_image) {
r = dissected_image_mount(dissected_image, directory, DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
/* If we are operating on a disk image, then mount its root directory now, but leave out the rest. We
* can read the UID shift from it if we need to. Further down we'll mount the rest, but then with the
* uid shift known. That way we can mount VFAT file systems shifted to the right place right away. This
* makes sure ESP partitions and userns are compatible. */
r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
if (r < 0)
return r;
}
@ -2618,6 +2624,14 @@ static int outer_child(
log_info("Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
}
if (dissected_image) {
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
if (r < 0)
return r;
}
if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
/* OK, we don't know yet which cgroup mode to use yet. Let's figure it out, and tell the parent. */

View file

@ -49,6 +49,7 @@
#include "string-util.h"
#include "strv.h"
#include "udev-util.h"
#include "user-util.h"
#include "xattr-util.h"
int probe_filesystem(const char *node, char **ret_fstype) {
@ -686,10 +687,11 @@ static int mount_partition(
DissectedPartition *m,
const char *where,
const char *directory,
uid_t uid_shift,
DissectImageFlags flags) {
const char *p, *options = NULL, *node, *fstype;
_cleanup_free_ char *chased = NULL;
_cleanup_free_ char *chased = NULL, *options = NULL;
const char *p, *node, *fstype;
bool rw;
int r;
@ -720,13 +722,26 @@ static int mount_partition(
/* If requested, turn on discard support. */
if (fstype_can_discard(fstype) &&
((flags & DISSECT_IMAGE_DISCARD) ||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node))))
options = "discard";
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
options = strdup("discard");
if (!options)
return -ENOMEM;
}
if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
_cleanup_free_ char *uid_option = NULL;
if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
return -ENOMEM;
if (!strextend_with_separator(&options, ",", uid_option, NULL))
return -ENOMEM;
}
return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
}
int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlags flags) {
int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
int r;
assert(m);
@ -735,15 +750,20 @@ int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlag
if (!m->partitions[PARTITION_ROOT].found)
return -ENXIO;
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags);
if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
if (r < 0)
return r;
}
if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
return 0;
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
if (r < 0)
return r;
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags);
if (r < 0)
return r;
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags);
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
if (r < 0)
return r;
@ -761,7 +781,7 @@ int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlag
r = dir_is_empty(p);
if (r > 0) {
r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags);
r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
if (r < 0)
return r;
}
@ -1254,7 +1274,7 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
_exit(EXIT_FAILURE);
r = dissected_image_mount(m, t, DISSECT_IMAGE_READ_ONLY);
r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
if (r < 0)
_exit(EXIT_FAILURE);

View file

@ -62,15 +62,17 @@ static inline int PARTITION_VERITY_OF(int p) {
}
typedef enum DissectImageFlags {
DISSECT_IMAGE_READ_ONLY = 1,
DISSECT_IMAGE_DISCARD_ON_LOOP = 2, /* Turn on "discard" if on a loop device and file system supports it */
DISSECT_IMAGE_DISCARD = 4, /* Turn on "discard" if file system supports it, on all block devices */
DISSECT_IMAGE_DISCARD_ON_CRYPTO = 8, /* Turn on "discard" also on crypto devices */
DISSECT_IMAGE_READ_ONLY = 1 << 0,
DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
DISSECT_IMAGE_DISCARD |
DISSECT_IMAGE_DISCARD_ON_CRYPTO,
DISSECT_IMAGE_GPT_ONLY = 16, /* Only recognize images with GPT partition tables */
DISSECT_IMAGE_REQUIRE_ROOT = 32, /* Don't accept disks without root partition */
DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition */
DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root partition */
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only non-root partitions */
} DissectImageFlags;
struct DissectedImage {
@ -94,7 +96,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
int dissected_image_mount(DissectedImage *m, const char *dest, DissectImageFlags flags);
int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
int dissected_image_acquire_metadata(DissectedImage *m);