nspawn: make sure images containing an ESP are compatible with userns -U mode
In -U mode we might need to re-chown() all files and directories to match the UID shift we want for the image. That's problematic on fat partitions, such as the ESP (and which is generated by mkosi's --bootable switch), because fat of course knows no UID/GID file ownership natively. With this change we take benefit of the uid= and gid= mount options FAT knows: instead of chown()ing all files and directories we can just specify the right UID/GID to use at mount time. This beefs up the image dissection logic in two ways: 1. First of all support for mounting relevant file systems with uid=/gid= is added: when a UID is specified during mount it is used for all applicable file systems. 2. Secondly, two new mount flags are added: DISSECT_IMAGE_MOUNT_ROOT_ONLY and DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY. If one is specified the mount routine will either only mount the root partition of an image, or all partitions except the root partition. This is used by nspawn: first the root partition is mounted, so that we can determine the UID shift in use so far, based on ownership of the image's root directory. Then, we mount the remaining partitions in a second go, this time with the right UID/GID information.
This commit is contained in:
parent
bb8ad9eaca
commit
2d3a5a73e0
|
@ -673,6 +673,22 @@ bool fstype_can_discard(const char *fstype) {
|
|||
"xfs");
|
||||
}
|
||||
|
||||
bool fstype_can_uid_gid(const char *fstype) {
|
||||
|
||||
/* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
|
||||
* current and future. */
|
||||
|
||||
return STR_IN_SET(fstype,
|
||||
"adfs",
|
||||
"fat",
|
||||
"hfs",
|
||||
"hpfs",
|
||||
"iso9660",
|
||||
"msdos",
|
||||
"ntfs",
|
||||
"vfat");
|
||||
}
|
||||
|
||||
int repeat_unmount(const char *path, int flags) {
|
||||
bool done = false;
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ bool fstype_is_network(const char *fstype);
|
|||
bool fstype_is_api_vfs(const char *fstype);
|
||||
bool fstype_is_ro(const char *fsype);
|
||||
bool fstype_can_discard(const char *fstype);
|
||||
bool fstype_can_uid_gid(const char *fstype);
|
||||
|
||||
const char* mode_to_inaccessible_node(mode_t mode);
|
||||
|
||||
|
|
|
@ -1155,7 +1155,7 @@ int setup_namespace(
|
|||
|
||||
if (root_image) {
|
||||
/* A root image is specified, mount it to the right place */
|
||||
r = dissected_image_mount(dissected_image, root, dissect_image_flags);
|
||||
r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "loop-util.h"
|
||||
#include "string-util.h"
|
||||
#include "strv.h"
|
||||
#include "user-util.h"
|
||||
#include "util.h"
|
||||
|
||||
static enum {
|
||||
|
@ -303,7 +304,7 @@ int main(int argc, char *argv[]) {
|
|||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
r = dissected_image_mount(m, arg_path, arg_flags);
|
||||
r = dissected_image_mount(m, arg_path, UID_INVALID, arg_flags);
|
||||
if (r < 0) {
|
||||
log_error_errno(r, "Failed to mount image: %m");
|
||||
goto finish;
|
||||
|
|
|
@ -2582,7 +2582,13 @@ static int outer_child(
|
|||
return r;
|
||||
|
||||
if (dissected_image) {
|
||||
r = dissected_image_mount(dissected_image, directory, DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
|
||||
/* If we are operating on a disk image, then mount its root directory now, but leave out the rest. We
|
||||
* can read the UID shift from it if we need to. Further down we'll mount the rest, but then with the
|
||||
* uid shift known. That way we can mount VFAT file systems shifted to the right place right away. This
|
||||
* makes sure ESP partitions and userns are compatible. */
|
||||
|
||||
r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
|
||||
DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
@ -2618,6 +2624,14 @@ static int outer_child(
|
|||
log_info("Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
|
||||
}
|
||||
|
||||
if (dissected_image) {
|
||||
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
|
||||
r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
|
||||
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
|
||||
/* OK, we don't know yet which cgroup mode to use yet. Let's figure it out, and tell the parent. */
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include "string-util.h"
|
||||
#include "strv.h"
|
||||
#include "udev-util.h"
|
||||
#include "user-util.h"
|
||||
#include "xattr-util.h"
|
||||
|
||||
int probe_filesystem(const char *node, char **ret_fstype) {
|
||||
|
@ -686,10 +687,11 @@ static int mount_partition(
|
|||
DissectedPartition *m,
|
||||
const char *where,
|
||||
const char *directory,
|
||||
uid_t uid_shift,
|
||||
DissectImageFlags flags) {
|
||||
|
||||
const char *p, *options = NULL, *node, *fstype;
|
||||
_cleanup_free_ char *chased = NULL;
|
||||
_cleanup_free_ char *chased = NULL, *options = NULL;
|
||||
const char *p, *node, *fstype;
|
||||
bool rw;
|
||||
int r;
|
||||
|
||||
|
@ -720,13 +722,26 @@ static int mount_partition(
|
|||
/* If requested, turn on discard support. */
|
||||
if (fstype_can_discard(fstype) &&
|
||||
((flags & DISSECT_IMAGE_DISCARD) ||
|
||||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node))))
|
||||
options = "discard";
|
||||
((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
|
||||
options = strdup("discard");
|
||||
if (!options)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
|
||||
_cleanup_free_ char *uid_option = NULL;
|
||||
|
||||
if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!strextend_with_separator(&options, ",", uid_option, NULL))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
|
||||
}
|
||||
|
||||
int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlags flags) {
|
||||
int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
@ -735,15 +750,20 @@ int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlag
|
|||
if (!m->partitions[PARTITION_ROOT].found)
|
||||
return -ENXIO;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags);
|
||||
if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
|
||||
r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if ((flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY))
|
||||
return 0;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags);
|
||||
r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -761,7 +781,7 @@ int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlag
|
|||
|
||||
r = dir_is_empty(p);
|
||||
if (r > 0) {
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags);
|
||||
r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
@ -1254,7 +1274,7 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
|
|||
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
r = dissected_image_mount(m, t, DISSECT_IMAGE_READ_ONLY);
|
||||
r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
|
||||
if (r < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
|
|
|
@ -62,15 +62,17 @@ static inline int PARTITION_VERITY_OF(int p) {
|
|||
}
|
||||
|
||||
typedef enum DissectImageFlags {
|
||||
DISSECT_IMAGE_READ_ONLY = 1,
|
||||
DISSECT_IMAGE_DISCARD_ON_LOOP = 2, /* Turn on "discard" if on a loop device and file system supports it */
|
||||
DISSECT_IMAGE_DISCARD = 4, /* Turn on "discard" if file system supports it, on all block devices */
|
||||
DISSECT_IMAGE_DISCARD_ON_CRYPTO = 8, /* Turn on "discard" also on crypto devices */
|
||||
DISSECT_IMAGE_READ_ONLY = 1 << 0,
|
||||
DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
|
||||
DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
|
||||
DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
|
||||
DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
|
||||
DISSECT_IMAGE_DISCARD |
|
||||
DISSECT_IMAGE_DISCARD_ON_CRYPTO,
|
||||
DISSECT_IMAGE_GPT_ONLY = 16, /* Only recognize images with GPT partition tables */
|
||||
DISSECT_IMAGE_REQUIRE_ROOT = 32, /* Don't accept disks without root partition */
|
||||
DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
|
||||
DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition */
|
||||
DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root partition */
|
||||
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only non-root partitions */
|
||||
} DissectImageFlags;
|
||||
|
||||
struct DissectedImage {
|
||||
|
@ -94,7 +96,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
|
|||
|
||||
int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
|
||||
int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
|
||||
int dissected_image_mount(DissectedImage *m, const char *dest, DissectImageFlags flags);
|
||||
int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
|
||||
|
||||
int dissected_image_acquire_metadata(DissectedImage *m);
|
||||
|
||||
|
|
Loading…
Reference in a new issue