From 9e70a49dc779ca8a082a96eb8eb6b4aec18dd966 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 1 Dec 2016 18:15:43 +0100 Subject: [PATCH 01/12] libudev: set errno if udev_new() fails All other constructors in libudev do that, let's also do this for udev_new(). --- src/libudev/libudev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libudev/libudev.c b/src/libudev/libudev.c index 57ce749e07..d8e13288b0 100644 --- a/src/libudev/libudev.c +++ b/src/libudev/libudev.c @@ -97,8 +97,10 @@ _public_ struct udev *udev_new(void) { _cleanup_fclose_ FILE *f = NULL; udev = new0(struct udev, 1); - if (udev == NULL) + if (!udev) { + errno = -ENOMEM; return NULL; + } udev->refcount = 1; f = fopen("/etc/udev/udev.conf", "re"); From 8c1be37e5b438bfdb640cfd39700bf074c66820c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 1 Dec 2016 20:25:26 +0100 Subject: [PATCH 02/12] util-lib: split out image dissecting code and loopback code from nspawn This adds two new APIs to systemd: - loop-util.h is a simple internal API for allocating, setting up and releasing loopback block devices. - dissect-image.h is an internal API for taking apart disk images and figuring out what the purpose of each partition is. Both APIs are basically refactored versions of similar code in nspawn. This rework should permit us to reuse this in other places than just nspawn in the future. Specifically: to implement RootImage= in the service image, similar to RootDirectory=, but operating on a disk image; to unify the gpt-auto-discovery generator code with the discovery logic in nspawn; to add new API to machined for determining the OS version of a disk image (i.e. not just running containers). This PR does not make any such changes however, it just provides the new reworked API. The reworked code is also slightly more powerful than the nspawn original one. When pointing it to an image or block device with a naked file system (i.e. no partition table) it will simply make it the root device. --- .gitignore | 1 + Makefile.am | 30 +- src/shared/dissect-image.c | 548 ++++++++++++++++++++++++++++++++++ src/shared/dissect-image.h | 66 ++++ src/shared/loop-util.c | 157 ++++++++++ src/shared/loop-util.h | 39 +++ src/test/test-dissect-image.c | 66 ++++ 7 files changed, 902 insertions(+), 5 deletions(-) create mode 100644 src/shared/dissect-image.c create mode 100644 src/shared/dissect-image.h create mode 100644 src/shared/loop-util.c create mode 100644 src/shared/loop-util.h create mode 100644 src/test/test-dissect-image.c diff --git a/.gitignore b/.gitignore index 21fcf9841c..405b07ef4d 100644 --- a/.gitignore +++ b/.gitignore @@ -180,6 +180,7 @@ /test-dhcp-option /test-dhcp-server /test-dhcp6-client +/test-dissect-image /test-dns-domain /test-dns-packet /test-dnssec diff --git a/Makefile.am b/Makefile.am index 124e1867cd..c47a07a1f0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1042,6 +1042,8 @@ libshared_la_SOURCES = \ src/shared/machine-image.h \ src/shared/machine-pool.c \ src/shared/machine-pool.h \ + src/shared/loop-util.c \ + src/shared/loop-util.h \ src/shared/resolve-util.c \ src/shared/resolve-util.h \ src/shared/bus-unit-util.c \ @@ -1053,7 +1055,9 @@ libshared_la_SOURCES = \ src/shared/fdset.c \ src/shared/fdset.h \ src/shared/nsflags.h \ - src/shared/nsflags.c + src/shared/nsflags.c \ + src/shared/dissect-image.c \ + src/shared/dissect-image.h if HAVE_UTMP libshared_la_SOURCES += \ @@ -1076,7 +1080,8 @@ libshared_la_CFLAGS = \ $(AM_CFLAGS) \ $(ACL_CFLAGS) \ $(LIBIDN_CFLAGS) \ - $(SECCOMP_CFLAGS) + $(SECCOMP_CFLAGS) \ + $(BLKID_CFLAGS) libshared_la_LIBADD = \ libsystemd-internal.la \ @@ -1085,7 +1090,8 @@ libshared_la_LIBADD = \ libudev-internal.la \ $(ACL_LIBS) \ $(LIBIDN_LIBS) \ - $(SECCOMP_LIBS) + $(SECCOMP_LIBS) \ + $(BLKID_LIBS) rootlibexec_LTLIBRARIES += \ libsystemd-shared.la @@ -1107,6 +1113,7 @@ libsystemd_shared_la_CFLAGS = \ $(ACL_CFLAGS) \ $(LIBIDN_CFLAGS) \ $(SECCOMP_CFLAGS) \ + $(BLKID_CFLAGS) \ -fvisibility=default # We can't use libshared_la_LIBADD here because it would @@ -1118,7 +1125,8 @@ libsystemd_shared_la_LIBADD = \ $(libudev_internal_la_LIBADD) \ $(ACL_LIBS) \ $(LIBIDN_LIBS) \ - $(SECCOMP_LIBS) + $(SECCOMP_LIBS) \ + $(BLKID_LIBS) libsystemd_shared_la_LDFLAGS = \ $(AM_LDFLAGS) \ @@ -1456,7 +1464,8 @@ manual_tests += \ test-btrfs \ test-acd \ test-ipv4ll-manual \ - test-ask-password-api + test-ask-password-api \ + test-dissect-image unsafe_tests = \ test-hostname \ @@ -2067,6 +2076,17 @@ test_ask_password_api_SOURCES = \ test_ask_password_api_LDADD = \ libsystemd-shared.la +test_dissect_image_SOURCES = \ + src/test/test-dissect-image.c + +test_dissect_image_CFLAGS = \ + $(AM_CFLAGS) \ + $(BLKID_CFLAGS) + +test_dissect_image_LDADD = \ + libsystemd-shared.la \ + $(BLKID_LIBS) + test_signal_util_SOURCES = \ src/test/test-signal-util.c diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c new file mode 100644 index 0000000000..7b65daa0eb --- /dev/null +++ b/src/shared/dissect-image.c @@ -0,0 +1,548 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +#include "architecture.h" +#include "blkid-util.h" +#include "dissect-image.h" +#include "gpt.h" +#include "mount-util.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "udev-util.h" + +int dissect_image(int fd, DissectedImage **ret) { + +#ifdef HAVE_BLKID + _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL; + bool is_gpt, is_mbr, generic_rw, multiple_generic = false; + _cleanup_udev_device_unref_ struct udev_device *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + _cleanup_blkid_free_probe_ blkid_probe b = NULL; + _cleanup_udev_unref_ struct udev *udev = NULL; + _cleanup_free_ char *generic_node = NULL; + const char *pttype = NULL, *usage = NULL; + struct udev_list_entry *first, *item; + blkid_partlist pl; + int r, generic_nr; + struct stat st; + unsigned i; + + assert(fd >= 0); + assert(ret); + + /* Probes a disk image, and returns information about what it found in *ret. + * + * Returns -ENOPKG if no suitable partition table or file system could be found. */ + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + b = blkid_new_probe(); + if (!b) + return -ENOMEM; + + errno = 0; + r = blkid_probe_set_device(b, fd, 0, 0); + if (r != 0) { + if (errno == 0) + return -ENOMEM; + + return -errno; + } + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE); + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2 || r == 1) { + log_debug("Failed to identify any partition table."); + return -ENOPKG; + } + if (r != 0) { + if (errno == 0) + return -EIO; + + return -errno; + } + + m = new0(DissectedImage, 1); + if (!m) + return -ENOMEM; + + (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); + if (streq_ptr(usage, "filesystem")) { + _cleanup_free_ char *t = NULL, *n = NULL; + const char *fstype = NULL; + + /* OK, we have found a file system, that's our root partition then. */ + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } + + if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0) + return -ENOMEM; + + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = true, + .partno = -1, + .architecture = _ARCHITECTURE_INVALID, + .fstype = t, + .node = n, + }; + + t = n = NULL; + + *ret = m; + m = NULL; + + return 0; + } + + (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); + if (!pttype) + return -ENOPKG; + + is_gpt = streq_ptr(pttype, "gpt"); + is_mbr = streq_ptr(pttype, "dos"); + + if (!is_gpt && !is_mbr) + return -ENOPKG; + + errno = 0; + pl = blkid_probe_get_partitions(b); + if (!pl) { + if (errno == 0) + return -ENOMEM; + + return -errno; + } + + udev = udev_new(); + if (!udev) + return -errno; + + d = udev_device_new_from_devnum(udev, 'b', st.st_rdev); + if (!d) + return -ENOMEM; + + for (i = 0;; i++) { + int n, z; + + if (i >= 10) { + log_debug("Kernel partitions never appeared."); + return -ENXIO; + } + + e = udev_enumerate_new(udev); + if (!e) + return -errno; + + r = udev_enumerate_add_match_parent(e, d); + if (r < 0) + return r; + + r = udev_enumerate_scan_devices(e); + if (r < 0) + return r; + + /* Count the partitions enumerated by the kernel */ + n = 0; + first = udev_enumerate_get_list_entry(e); + udev_list_entry_foreach(item, first) + n++; + + /* Count the partitions enumerated by blkid */ + z = blkid_partlist_numof_partitions(pl); + if (n == z + 1) + break; + if (n > z + 1) { + log_debug("blkid and kernel partition list do not match."); + return -EIO; + } + if (n < z + 1) { + unsigned j; + + /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running + * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a + * synchronous call that waits until probing is complete. */ + + for (j = 0; j < 20; j++) { + + r = ioctl(fd, BLKRRPART, 0); + if (r < 0) + r = -errno; + if (r >= 0 || r != -EBUSY) + break; + + /* If something else has the device open, such as an udev rule, the ioctl will return + * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a + * bit, and try again. + * + * This is really something they should fix in the kernel! */ + + usleep(50 * USEC_PER_MSEC); + } + + if (r < 0) + return r; + } + + e = udev_enumerate_unref(e); + } + + first = udev_enumerate_get_list_entry(e); + udev_list_entry_foreach(item, first) { + _cleanup_udev_device_unref_ struct udev_device *q; + unsigned long long flags; + blkid_partition pp; + const char *node; + dev_t qn; + int nr; + + q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item)); + if (!q) + return -errno; + + qn = udev_device_get_devnum(q); + if (major(qn) == 0) + continue; + + if (st.st_rdev == qn) + continue; + + node = udev_device_get_devnode(q); + if (!node) + continue; + + pp = blkid_partlist_devno_to_partition(pl, qn); + if (!pp) + continue; + + flags = blkid_partition_get_flags(pp); + + nr = blkid_partition_get_partno(pp); + if (nr < 0) + continue; + + if (is_gpt) { + int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID; + const char *stype, *fstype = NULL; + sd_id128_t type_id; + bool rw = true; + + if (flags & GPT_FLAG_NO_AUTO) + continue; + + stype = blkid_partition_get_type_string(pp); + if (!stype) + continue; + + if (sd_id128_from_string(stype, &type_id) < 0) + continue; + + if (sd_id128_equal(type_id, GPT_HOME)) { + designator = PARTITION_HOME; + rw = !(flags & GPT_FLAG_READ_ONLY); + } else if (sd_id128_equal(type_id, GPT_SRV)) { + designator = PARTITION_SRV; + rw = !(flags & GPT_FLAG_READ_ONLY); + } else if (sd_id128_equal(type_id, GPT_ESP)) { + designator = PARTITION_ESP; + fstype = "vfat"; + } +#ifdef GPT_ROOT_NATIVE + else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { + designator = PARTITION_ROOT; + architecture = native_architecture(); + rw = !(flags & GPT_FLAG_READ_ONLY); + } +#endif +#ifdef GPT_ROOT_SECONDARY + else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { + designator = PARTITION_ROOT_SECONDARY; + architecture = SECONDARY_ARCHITECTURE; + rw = !(flags & GPT_FLAG_READ_ONLY); + } +#endif + else if (sd_id128_equal(type_id, GPT_SWAP)) { + designator = PARTITION_SWAP; + fstype = "swap"; + } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) { + + if (generic_node) + multiple_generic = true; + else { + generic_nr = nr; + generic_rw = !(flags & GPT_FLAG_READ_ONLY); + generic_node = strdup(node); + if (!generic_node) + return -ENOMEM; + } + } + + if (designator != _PARTITION_DESIGNATOR_INVALID) { + _cleanup_free_ char *t = NULL, *n = NULL; + + /* First one wins */ + if (m->partitions[designator].found) + continue; + + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } + + n = strdup(node); + if (!n) + return -ENOMEM; + + m->partitions[designator] = (DissectedPartition) { + .found = true, + .partno = nr, + .rw = rw, + .architecture = architecture, + .node = n, + .fstype = t, + }; + + n = t = NULL; + } + + } else if (is_mbr) { + + if (flags != 0x80) /* Bootable flag */ + continue; + + if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */ + continue; + + if (generic_node) + multiple_generic = true; + else { + generic_nr = nr; + generic_rw = true; + generic_node = strdup(node); + if (!generic_node) + return -ENOMEM; + } + } + } + + if (!m->partitions[PARTITION_ROOT].found) { + /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not + * either, then check if there's a single generic one, and use that. */ + + if (m->partitions[PARTITION_ROOT_SECONDARY].found) { + m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY]; + zero(m->partitions[PARTITION_ROOT_SECONDARY]); + } else if (generic_node) { + + if (multiple_generic) + return -ENOTUNIQ; + + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = generic_rw, + .partno = generic_nr, + .architecture = _ARCHITECTURE_INVALID, + .node = generic_node, + }; + + generic_node = NULL; + } else + return -ENXIO; + } + + /* Fill in file system types if we don't know them yet. */ + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + const char *fstype; + + if (!m->partitions[i].found) /* not found? */ + continue; + + if (m->partitions[i].fstype) /* already know the type? */ + continue; + + if (!m->partitions[i].node) /* have no device node for? */ + continue; + + if (b) + blkid_free_probe(b); + + b = blkid_new_probe_from_filename(m->partitions[i].node); + if (!b) + return -ENOMEM; + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2 || r == 1) { + log_debug("Failed to identify any partition type on partition %i", m->partitions[i].partno); + continue; + } + if (r != 0) { + if (errno == 0) + return -EIO; + + return -errno; + } + + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + if (fstype) { + char *t; + + t = strdup(fstype); + if (!t) + return -ENOMEM; + + m->partitions[i].fstype = t; + } + } + + *ret = m; + m = NULL; + + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +DissectedImage* dissected_image_unref(DissectedImage *m) { + unsigned i; + + if (!m) + return NULL; + + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + free(m->partitions[i].fstype); + free(m->partitions[i].node); + } + + free(m); + return NULL; +} + +static int mount_partition(DissectedPartition *m, const char *where, const char *directory, DissectedImageMountFlags flags) { + const char *p, *options = NULL; + bool rw; + + assert(m); + assert(where); + + if (!m->found || !m->node || !m->fstype) + return 0; + + rw = m->rw && !(flags & DISSECTED_IMAGE_READ_ONLY); + + if (directory) + p = strjoina(where, directory); + else + p = where; + + /* Not supported for now. */ + if (streq(m->fstype, "crypto_LUKS")) + return -EOPNOTSUPP; + + /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains + * sparse when possible. */ + if ((flags & DISSECTED_IMAGE_DISCARD_ON_LOOP) && + STR_IN_SET(m->fstype, "btrfs", "ext4", "vfat", "xfs")) { + const char *l; + + l = path_startswith(m->node, "/dev"); + if (l && startswith(l, "loop")) + options = "discard"; + } + + return mount_verbose(LOG_DEBUG, m->node, p, m->fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); +} + +int dissected_image_mount(DissectedImage *m, const char *where, DissectedImageMountFlags flags) { + int r; + + assert(m); + assert(where); + + if (!m->partitions[PARTITION_ROOT].found) + return -ENXIO; + + r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags); + if (r < 0) + return r; + + r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags); + if (r < 0) + return r; + + r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags); + if (r < 0) + return r; + + if (m->partitions[PARTITION_ESP].found) { + const char *mp, *x; + + /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */ + + mp = "/efi"; + x = strjoina(where, mp); + r = dir_is_empty(x); + if (r == -ENOENT) { + mp = "/boot"; + x = strjoina(where, mp); + r = dir_is_empty(x); + } + if (r > 0) { + r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags); + if (r < 0) + return r; + } + } + + return 0; +} + +static const char *const partition_designator_table[] = { + [PARTITION_ROOT] = "root", + [PARTITION_ROOT_SECONDARY] = "root-secondary", + [PARTITION_HOME] = "home", + [PARTITION_SRV] = "srv", + [PARTITION_ESP] = "esp", + [PARTITION_SWAP] = "swap", +}; + +DEFINE_STRING_TABLE_LOOKUP(partition_designator, int); diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h new file mode 100644 index 0000000000..04b19e8553 --- /dev/null +++ b/src/shared/dissect-image.h @@ -0,0 +1,66 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +#include "macro.h" + +typedef struct DissectedImage DissectedImage; +typedef struct DissectedPartition DissectedPartition; + +struct DissectedPartition { + bool found:1; + bool rw:1; + int partno; /* -1 if there was no partition and the images contains a file system directly */ + int architecture; /* Intended architecture: either native, secondary or unset (-1). */ + char *fstype; + char *node; +}; + +enum { + PARTITION_ROOT, + PARTITION_ROOT_SECONDARY, /* Secondary architecture */ + PARTITION_HOME, + PARTITION_SRV, + PARTITION_ESP, + PARTITION_SWAP, + _PARTITION_DESIGNATOR_MAX, + _PARTITION_DESIGNATOR_INVALID = -1 +}; + +typedef enum DissectedImageMountFlags { + DISSECTED_IMAGE_READ_ONLY = 1, + DISSECTED_IMAGE_DISCARD_ON_LOOP = 2, /* Turn on "discard" if on loop device and file system supports it */ +} DissectedImageMountFlags; + +struct DissectedImage { + DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX]; +}; + +int dissect_image(int fd, DissectedImage **ret); + +DissectedImage* dissected_image_unref(DissectedImage *m); +DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref); + +int dissected_image_mount(DissectedImage *m, const char *dest, DissectedImageMountFlags flags); + +const char* partition_designator_to_string(int i) _const_; +int partition_designator_from_string(const char *name) _pure_; diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c new file mode 100644 index 0000000000..8be4dbf938 --- /dev/null +++ b/src/shared/loop-util.c @@ -0,0 +1,157 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include + +#include "alloc-util.h" +#include "fd-util.h" +#include "loop-util.h" + +int loop_device_make(int fd, int open_flags, LoopDevice **ret) { + const struct loop_info64 info = { + .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN|(open_flags == O_RDONLY ? LO_FLAGS_READ_ONLY : 0), + }; + + _cleanup_close_ int control = -1, loop = -1; + _cleanup_free_ char *loopdev = NULL; + struct stat st; + LoopDevice *d; + int nr; + + assert(fd >= 0); + assert(ret); + assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); + + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISBLK(st.st_mode)) { + int copy; + + /* If this is already a block device, store a copy of the fd as it is */ + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + d = new0(LoopDevice, 1); + if (!d) + return -ENOMEM; + + *d = (LoopDevice) { + .fd = copy, + .nr = -1, + }; + + *ret = d; + + return 0; + } + + if (!S_ISREG(st.st_mode)) + return -EINVAL; + + control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (control < 0) + return -errno; + + nr = ioctl(control, LOOP_CTL_GET_FREE); + if (nr < 0) + return -errno; + + if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) + return -ENOMEM; + + loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); + if (loop < 0) + return -errno; + + if (ioctl(loop, LOOP_SET_FD, fd) < 0) + return -errno; + + if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) + return -errno; + + d = new(LoopDevice, 1); + if (!d) + return -ENOMEM; + + *d = (LoopDevice) { + .fd = loop, + .node = loopdev, + .nr = nr, + }; + + loop = -1; + loopdev = NULL; + + *ret = d; + + return (*ret)->fd; +} + +int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret) { + _cleanup_close_ int fd = -1; + + assert(path); + assert(ret); + assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); + + fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); + if (fd < 0) + return -errno; + + return loop_device_make(fd, open_flags, ret); +} + +LoopDevice* loop_device_unref(LoopDevice *d) { + if (!d) + return NULL; + + if (d->fd >= 0) { + + if (d->nr >= 0) { + if (ioctl(d->fd, LOOP_CLR_FD) < 0) + log_debug_errno(errno, "Failed to clear loop device: %m"); + + } + + safe_close(d->fd); + } + + if (d->nr >= 0) { + _cleanup_close_ int control = -1; + + control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (control < 0) + log_debug_errno(errno, "Failed to open loop control device: %m"); + else { + if (ioctl(control, LOOP_CTL_REMOVE, d->nr) < 0) + log_debug_errno(errno, "Failed to remove loop device: %m"); + } + } + + free(d->node); + free(d); + + return NULL; +} diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h new file mode 100644 index 0000000000..5c847c4ac3 --- /dev/null +++ b/src/shared/loop-util.h @@ -0,0 +1,39 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include "macro.h" + +typedef struct LoopDevice LoopDevice; + +/* Some helpers for setting up loopback block devices */ + +struct LoopDevice { + int fd; + int nr; + char *node; +}; + +int loop_device_make(int fd, int open_flags, LoopDevice **ret); +int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret); + +LoopDevice* loop_device_unref(LoopDevice *d); + +DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref); diff --git a/src/test/test-dissect-image.c b/src/test/test-dissect-image.c new file mode 100644 index 0000000000..0363ef8eb6 --- /dev/null +++ b/src/test/test-dissect-image.c @@ -0,0 +1,66 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "dissect-image.h" +#include "log.h" +#include "loop-util.h" +#include "string-util.h" + +int main(int argc, char *argv[]) { + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + int r, i; + + log_set_max_level(LOG_DEBUG); + + if (argc < 2) { + log_error("Requires one command line argument."); + return EXIT_FAILURE; + } + + r = loop_device_make_by_path(argv[1], O_RDONLY, &d); + if (r < 0) { + log_error_errno(r, "Failed to set up loopback device: %m"); + return EXIT_FAILURE; + } + + r = dissect_image(d->fd, &m); + if (r < 0) { + log_error_errno(r, "Failed to dissect image: %m"); + return EXIT_FAILURE; + } + + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + + if (!m->partitions[i].found) + continue; + + printf("Found %s partition, %s of type %s at #%i (%s)\n", + partition_designator_to_string(i), + m->partitions[i].rw ? "writable" : "read-only", + strna(m->partitions[i].fstype), + m->partitions[i].partno, + strna(m->partitions[i].node)); + } + + return EXIT_SUCCESS; +} From 2d8457851b85813724b1b8b8c7a49c1c4200dd45 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 1 Dec 2016 20:26:09 +0100 Subject: [PATCH 03/12] nspawn: port nspawn to new generalized image dissection code Let's make use of the new internal API. This mostly doesn't change anything for the caller, however, "systemd-nspawn --image=/dev/sda7" works now as the new code can handle disk images with no partition tables, and make any detected images directly the root. --- src/nspawn/nspawn.c | 694 +++----------------------------------------- 1 file changed, 38 insertions(+), 656 deletions(-) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index ddd6a64ec6..6ad20f7457 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -53,6 +53,7 @@ #include "cgroup-util.h" #include "copy.h" #include "dev-setup.h" +#include "dissect-image.h" #include "env-util.h" #include "fd-util.h" #include "fdset.h" @@ -63,6 +64,7 @@ #include "hostname-util.h" #include "id128-util.h" #include "log.h" +#include "loop-util.h" #include "loopback-setup.h" #include "machine-image.h" #include "macro.h" @@ -1769,546 +1771,6 @@ static int setup_propagate(const char *root) { return mount_verbose(LOG_ERR, NULL, q, NULL, MS_SLAVE, NULL); } -static int setup_image(char **device_path, int *loop_nr) { - struct loop_info64 info = { - .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN - }; - _cleanup_close_ int fd = -1, control = -1, loop = -1; - _cleanup_free_ char* loopdev = NULL; - struct stat st; - int r, nr; - - assert(device_path); - assert(loop_nr); - assert(arg_image); - - fd = open(arg_image, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY); - if (fd < 0) - return log_error_errno(errno, "Failed to open %s: %m", arg_image); - - if (fstat(fd, &st) < 0) - return log_error_errno(errno, "Failed to stat %s: %m", arg_image); - - if (S_ISBLK(st.st_mode)) { - char *p; - - p = strdup(arg_image); - if (!p) - return log_oom(); - - *device_path = p; - - *loop_nr = -1; - - r = fd; - fd = -1; - - return r; - } - - if (!S_ISREG(st.st_mode)) { - log_error("%s is not a regular file or block device.", arg_image); - return -EINVAL; - } - - control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); - if (control < 0) - return log_error_errno(errno, "Failed to open /dev/loop-control: %m"); - - nr = ioctl(control, LOOP_CTL_GET_FREE); - if (nr < 0) - return log_error_errno(errno, "Failed to allocate loop device: %m"); - - if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) - return log_oom(); - - loop = open(loopdev, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY); - if (loop < 0) - return log_error_errno(errno, "Failed to open loop device %s: %m", loopdev); - - if (ioctl(loop, LOOP_SET_FD, fd) < 0) - return log_error_errno(errno, "Failed to set loopback file descriptor on %s: %m", loopdev); - - if (arg_read_only) - info.lo_flags |= LO_FLAGS_READ_ONLY; - - if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) - return log_error_errno(errno, "Failed to set loopback settings on %s: %m", loopdev); - - *device_path = loopdev; - loopdev = NULL; - - *loop_nr = nr; - - r = loop; - loop = -1; - - return r; -} - -#define PARTITION_TABLE_BLURB \ - "Note that the disk image needs to either contain only a single MBR partition of\n" \ - "type 0x83 that is marked bootable, or a single GPT partition of type " \ - "0FC63DAF-8483-4772-8E79-3D69D8477DE4 or follow\n" \ - " http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" \ - "to be bootable with systemd-nspawn." - -static int dissect_image( - int fd, - char **root_device, bool *root_device_rw, - char **home_device, bool *home_device_rw, - char **srv_device, bool *srv_device_rw, - char **esp_device, - bool *secondary) { - -#ifdef HAVE_BLKID - int home_nr = -1, srv_nr = -1, esp_nr = -1; -#ifdef GPT_ROOT_NATIVE - int root_nr = -1; -#endif -#ifdef GPT_ROOT_SECONDARY - int secondary_root_nr = -1; -#endif - _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL, *esp = NULL, *generic = NULL; - _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL; - _cleanup_udev_device_unref_ struct udev_device *d = NULL; - _cleanup_blkid_free_probe_ blkid_probe b = NULL; - _cleanup_udev_unref_ struct udev *udev = NULL; - struct udev_list_entry *first, *item; - bool home_rw = true, root_rw = true, secondary_root_rw = true, srv_rw = true, generic_rw = true; - bool is_gpt, is_mbr, multiple_generic = false; - const char *pttype = NULL; - blkid_partlist pl; - struct stat st; - unsigned i; - int r; - - assert(fd >= 0); - assert(root_device); - assert(home_device); - assert(srv_device); - assert(esp_device); - assert(secondary); - assert(arg_image); - - b = blkid_new_probe(); - if (!b) - return log_oom(); - - errno = 0; - r = blkid_probe_set_device(b, fd, 0, 0); - if (r != 0) { - if (errno == 0) - return log_oom(); - - return log_error_errno(errno, "Failed to set device on blkid probe: %m"); - } - - blkid_probe_enable_partitions(b, 1); - blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); - - errno = 0; - r = blkid_do_safeprobe(b); - if (r == -2 || r == 1) { - log_error("Failed to identify any partition table on\n" - " %s\n" - PARTITION_TABLE_BLURB, arg_image); - return -EINVAL; - } else if (r != 0) { - if (errno == 0) - errno = EIO; - return log_error_errno(errno, "Failed to probe: %m"); - } - - (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); - - is_gpt = streq_ptr(pttype, "gpt"); - is_mbr = streq_ptr(pttype, "dos"); - - if (!is_gpt && !is_mbr) { - log_error("No GPT or MBR partition table discovered on\n" - " %s\n" - PARTITION_TABLE_BLURB, arg_image); - return -EINVAL; - } - - errno = 0; - pl = blkid_probe_get_partitions(b); - if (!pl) { - if (errno == 0) - return log_oom(); - - log_error("Failed to list partitions of %s", arg_image); - return -errno; - } - - udev = udev_new(); - if (!udev) - return log_oom(); - - if (fstat(fd, &st) < 0) - return log_error_errno(errno, "Failed to stat block device: %m"); - - d = udev_device_new_from_devnum(udev, 'b', st.st_rdev); - if (!d) - return log_oom(); - - for (i = 0;; i++) { - int n, m; - - if (i >= 10) { - log_error("Kernel partitions never appeared."); - return -ENXIO; - } - - e = udev_enumerate_new(udev); - if (!e) - return log_oom(); - - r = udev_enumerate_add_match_parent(e, d); - if (r < 0) - return log_oom(); - - r = udev_enumerate_scan_devices(e); - if (r < 0) - return log_error_errno(r, "Failed to scan for partition devices of %s: %m", arg_image); - - /* Count the partitions enumerated by the kernel */ - n = 0; - first = udev_enumerate_get_list_entry(e); - udev_list_entry_foreach(item, first) - n++; - - /* Count the partitions enumerated by blkid */ - m = blkid_partlist_numof_partitions(pl); - if (n == m + 1) - break; - if (n > m + 1) { - log_error("blkid and kernel partition list do not match."); - return -EIO; - } - if (n < m + 1) { - unsigned j; - - /* The kernel has probed fewer partitions than - * blkid? Maybe the kernel prober is still - * running or it got EBUSY because udev - * already opened the device. Let's reprobe - * the device, which is a synchronous call - * that waits until probing is complete. */ - - for (j = 0; j < 20; j++) { - - r = ioctl(fd, BLKRRPART, 0); - if (r < 0) - r = -errno; - if (r >= 0 || r != -EBUSY) - break; - - /* If something else has the device - * open, such as an udev rule, the - * ioctl will return EBUSY. Since - * there's no way to wait until it - * isn't busy anymore, let's just wait - * a bit, and try again. - * - * This is really something they - * should fix in the kernel! */ - - usleep(50 * USEC_PER_MSEC); - } - - if (r < 0) - return log_error_errno(r, "Failed to reread partition table: %m"); - } - - e = udev_enumerate_unref(e); - } - - first = udev_enumerate_get_list_entry(e); - udev_list_entry_foreach(item, first) { - _cleanup_udev_device_unref_ struct udev_device *q; - const char *node; - unsigned long long flags; - blkid_partition pp; - dev_t qn; - int nr; - - errno = 0; - q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item)); - if (!q) { - if (!errno) - errno = ENOMEM; - - return log_error_errno(errno, "Failed to get partition device of %s: %m", arg_image); - } - - qn = udev_device_get_devnum(q); - if (major(qn) == 0) - continue; - - if (st.st_rdev == qn) - continue; - - node = udev_device_get_devnode(q); - if (!node) - continue; - - pp = blkid_partlist_devno_to_partition(pl, qn); - if (!pp) - continue; - - flags = blkid_partition_get_flags(pp); - - nr = blkid_partition_get_partno(pp); - if (nr < 0) - continue; - - if (is_gpt) { - sd_id128_t type_id; - const char *stype; - - if (flags & GPT_FLAG_NO_AUTO) - continue; - - stype = blkid_partition_get_type_string(pp); - if (!stype) - continue; - - if (sd_id128_from_string(stype, &type_id) < 0) - continue; - - if (sd_id128_equal(type_id, GPT_HOME)) { - - if (home && nr >= home_nr) - continue; - - home_nr = nr; - home_rw = !(flags & GPT_FLAG_READ_ONLY); - - r = free_and_strdup(&home, node); - if (r < 0) - return log_oom(); - - } else if (sd_id128_equal(type_id, GPT_SRV)) { - - if (srv && nr >= srv_nr) - continue; - - srv_nr = nr; - srv_rw = !(flags & GPT_FLAG_READ_ONLY); - - r = free_and_strdup(&srv, node); - if (r < 0) - return log_oom(); - } else if (sd_id128_equal(type_id, GPT_ESP)) { - - if (esp && nr >= esp_nr) - continue; - - esp_nr = nr; - - r = free_and_strdup(&esp, node); - if (r < 0) - return log_oom(); - } -#ifdef GPT_ROOT_NATIVE - else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { - - if (root && nr >= root_nr) - continue; - - root_nr = nr; - root_rw = !(flags & GPT_FLAG_READ_ONLY); - - r = free_and_strdup(&root, node); - if (r < 0) - return log_oom(); - } -#endif -#ifdef GPT_ROOT_SECONDARY - else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { - - if (secondary_root && nr >= secondary_root_nr) - continue; - - secondary_root_nr = nr; - secondary_root_rw = !(flags & GPT_FLAG_READ_ONLY); - - r = free_and_strdup(&secondary_root, node); - if (r < 0) - return log_oom(); - } -#endif - else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) { - - if (generic) - multiple_generic = true; - else { - generic_rw = !(flags & GPT_FLAG_READ_ONLY); - - r = free_and_strdup(&generic, node); - if (r < 0) - return log_oom(); - } - } - - } else if (is_mbr) { - int type; - - if (flags != 0x80) /* Bootable flag */ - continue; - - type = blkid_partition_get_type(pp); - if (type != 0x83) /* Linux partition */ - continue; - - if (generic) - multiple_generic = true; - else { - generic_rw = true; - - r = free_and_strdup(&root, node); - if (r < 0) - return log_oom(); - } - } - } - - if (root) { - *root_device = root; - root = NULL; - - *root_device_rw = root_rw; - *secondary = false; - } else if (secondary_root) { - *root_device = secondary_root; - secondary_root = NULL; - - *root_device_rw = secondary_root_rw; - *secondary = true; - } else if (generic) { - - /* There were no partitions with precise meanings - * around, but we found generic partitions. In this - * case, if there's only one, we can go ahead and boot - * it, otherwise we bail out, because we really cannot - * make any sense of it. */ - - if (multiple_generic) { - log_error("Identified multiple bootable Linux partitions on\n" - " %s\n" - PARTITION_TABLE_BLURB, arg_image); - return -EINVAL; - } - - *root_device = generic; - generic = NULL; - - *root_device_rw = generic_rw; - *secondary = false; - } else { - log_error("Failed to identify root partition in disk image\n" - " %s\n" - PARTITION_TABLE_BLURB, arg_image); - return -EINVAL; - } - - if (home) { - *home_device = home; - home = NULL; - - *home_device_rw = home_rw; - } - - if (srv) { - *srv_device = srv; - srv = NULL; - - *srv_device_rw = srv_rw; - } - - if (esp) { - *esp_device = esp; - esp = NULL; - } - - return 0; -#else - log_error("--image= is not supported, compiled without blkid support."); - return -EOPNOTSUPP; -#endif -} - -static int mount_device(const char *what, const char *where, const char *directory, bool rw) { -#ifdef HAVE_BLKID - _cleanup_blkid_free_probe_ blkid_probe b = NULL; - const char *fstype, *p, *options; - int r; - - assert(what); - assert(where); - - if (arg_read_only) - rw = false; - - if (directory) - p = strjoina(where, directory); - else - p = where; - - errno = 0; - b = blkid_new_probe_from_filename(what); - if (!b) { - if (errno == 0) - return log_oom(); - return log_error_errno(errno, "Failed to allocate prober for %s: %m", what); - } - - blkid_probe_enable_superblocks(b, 1); - blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); - - errno = 0; - r = blkid_do_safeprobe(b); - if (r == -1 || r == 1) { - log_error("Cannot determine file system type of %s", what); - return -EINVAL; - } else if (r != 0) { - if (errno == 0) - errno = EIO; - return log_error_errno(errno, "Failed to probe %s: %m", what); - } - - errno = 0; - if (blkid_probe_lookup_value(b, "TYPE", &fstype, NULL) < 0) { - if (errno == 0) - errno = EINVAL; - log_error("Failed to determine file system type of %s", what); - return -errno; - } - - if (streq(fstype, "crypto_LUKS")) { - log_error("nspawn currently does not support LUKS disk images."); - return -EOPNOTSUPP; - } - - /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains - * sparse when possible. */ - if (STR_IN_SET(fstype, "btrfs", "ext4", "vfat", "xfs")) { - const char *l; - - l = path_startswith(what, "/dev"); - if (l && startswith(l, "loop")) - options = "discard"; - } - - return mount_verbose(LOG_ERR, what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); -#else - log_error("--image= is not supported, compiled without blkid support."); - return -EOPNOTSUPP; -#endif -} - static int setup_machine_id(const char *directory) { const char *etc_machine_id; sd_id128_t id; @@ -2368,83 +1830,6 @@ static int recursive_chown(const char *directory, uid_t shift, uid_t range) { return r; } -static int mount_devices( - const char *where, - const char *root_device, bool root_device_rw, - const char *home_device, bool home_device_rw, - const char *srv_device, bool srv_device_rw, - const char *esp_device) { - int r; - - assert(where); - - if (root_device) { - r = mount_device(root_device, arg_directory, NULL, root_device_rw); - if (r < 0) - return log_error_errno(r, "Failed to mount root directory: %m"); - } - - if (home_device) { - r = mount_device(home_device, arg_directory, "/home", home_device_rw); - if (r < 0) - return log_error_errno(r, "Failed to mount home directory: %m"); - } - - if (srv_device) { - r = mount_device(srv_device, arg_directory, "/srv", srv_device_rw); - if (r < 0) - return log_error_errno(r, "Failed to mount server data directory: %m"); - } - - if (esp_device) { - const char *mp, *x; - - /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */ - - mp = "/efi"; - x = strjoina(arg_directory, mp); - r = dir_is_empty(x); - if (r == -ENOENT) { - mp = "/boot"; - x = strjoina(arg_directory, mp); - r = dir_is_empty(x); - } - - if (r > 0) { - r = mount_device(esp_device, arg_directory, mp, true); - if (r < 0) - return log_error_errno(r, "Failed to mount ESP: %m"); - } - } - - return 0; -} - -static void loop_remove(int nr, int *image_fd) { - _cleanup_close_ int control = -1; - int r; - - if (nr < 0) - return; - - if (image_fd && *image_fd >= 0) { - r = ioctl(*image_fd, LOOP_CLR_FD); - if (r < 0) - log_debug_errno(errno, "Failed to close loop image: %m"); - *image_fd = safe_close(*image_fd); - } - - control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); - if (control < 0) { - log_warning_errno(errno, "Failed to open /dev/loop-control: %m"); - return; - } - - r = ioctl(control, LOOP_CTL_REMOVE, nr); - if (r < 0) - log_debug_errno(errno, "Failed to remove loop %d: %m", nr); -} - /* * Return values: * < 0 : wait_for_terminate() failed to get the state of the @@ -2919,10 +2304,7 @@ static int outer_child( Barrier *barrier, const char *directory, const char *console, - const char *root_device, bool root_device_rw, - const char *home_device, bool home_device_rw, - const char *srv_device, bool srv_device_rw, - const char *esp_device, + DissectedImage *dissected_image, bool interactive, bool secondary, int pid_socket, @@ -2982,13 +2364,11 @@ static int outer_child( if (r < 0) return r; - r = mount_devices(directory, - root_device, root_device_rw, - home_device, home_device_rw, - srv_device, srv_device_rw, - esp_device); - if (r < 0) - return r; + if (dissected_image) { + r = dissected_image_mount(dissected_image, directory, DISSECTED_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECTED_IMAGE_READ_ONLY : 0)); + if (r < 0) + return r; + } r = determine_uid_shift(directory); if (r < 0) @@ -3605,10 +2985,7 @@ static int load_settings(void) { static int run(int master, const char* console, - const char *root_device, bool root_device_rw, - const char *home_device, bool home_device_rw, - const char *srv_device, bool srv_device_rw, - const char *esp_device, + DissectedImage *dissected_image, bool interactive, bool secondary, FDSet *fds, @@ -3715,10 +3092,7 @@ static int run(int master, r = outer_child(&barrier, arg_directory, console, - root_device, root_device_rw, - home_device, home_device_rw, - srv_device, srv_device_rw, - esp_device, + dissected_image, interactive, secondary, pid_socket_pair[1], @@ -4025,11 +3399,10 @@ static int run(int master, int main(int argc, char *argv[]) { - _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL, *esp_device = NULL, *console = NULL; - bool root_device_rw = true, home_device_rw = true, srv_device_rw = true; - _cleanup_close_ int master = -1, image_fd = -1; + _cleanup_free_ char *console = NULL; + _cleanup_close_ int master = -1; _cleanup_fdset_free_ FDSet *fds = NULL; - int r, n_fd_passed, loop_nr = -1, ret = EXIT_SUCCESS; + int r, n_fd_passed, ret = EXIT_SUCCESS; char veth_name[IFNAMSIZ] = ""; bool secondary = false, remove_directory = false, remove_image = false; pid_t pid = 0; @@ -4037,6 +3410,8 @@ int main(int argc, char *argv[]) { _cleanup_release_lock_file_ LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT; bool interactive, veth_created = false, remove_tmprootdir = false; char tmprootdir[] = "/tmp/nspawn-root-XXXXXX"; + _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; + _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL; log_parse_environment(); log_open(); @@ -4250,20 +3625,32 @@ int main(int argc, char *argv[]) { goto finish; } - image_fd = setup_image(&device_path, &loop_nr); - if (image_fd < 0) { - r = image_fd; + r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &loop); + if (r < 0) { + log_error_errno(r, "Failed to set up loopback block device: %m"); goto finish; } - r = dissect_image(image_fd, - &root_device, &root_device_rw, - &home_device, &home_device_rw, - &srv_device, &srv_device_rw, - &esp_device, - &secondary); - if (r < 0) + r = dissect_image(loop->fd, &dissected_image); + if (r == -ENOPKG) { + log_error_errno(r, "Could not find a suitable file system or partition table in image: %s", arg_image); + + log_notice("Note that the disk image needs to\n" + " a) either contain only a single MBR partition of type 0x83 that is marked bootable\n" + " b) or contain a single GPT partition of type 0FC63DAF-8483-4772-8E79-3D69D8477DE4\n" + " c) or follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" + " d) or contain a file system without a partition table\n" + "in order to be bootable with systemd-nspawn."); goto finish; + } + if (r == -EOPNOTSUPP) { + log_error_errno(r, "--image= is not supported, compiled without blkid support."); + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to dissect image: %m"); + goto finish; + } /* Now that we mounted the image, let's try to remove it again, if it is ephemeral */ if (remove_image && unlink(arg_image) >= 0) @@ -4315,10 +3702,7 @@ int main(int argc, char *argv[]) { for (;;) { r = run(master, console, - root_device, root_device_rw, - home_device, home_device_rw, - srv_device, srv_device_rw, - esp_device, + dissected_image, interactive, secondary, fds, veth_name, &veth_created, @@ -4345,8 +3729,6 @@ finish: if (pid > 0) (void) wait_for_terminate(pid, NULL); - loop_remove(loop_nr, &image_fd); - if (remove_directory && arg_directory) { int k; From 676bcb0fc042c24e4335832622ea4104a1295b48 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 1 Dec 2016 23:19:31 +0100 Subject: [PATCH 04/12] util-lib: add easy helpers for temporary directories that rmdir()ed via _cleanup_ This adds mkdtemp_malloc() that is a combination of mkdtemp() plus strdup(). It initializes its return paremeter only if the temporary directory could be created successfully, so that the parameter is exactly non-NULL when the directory exists. rmdir_and_free() and rmdir_and_freep() are also added, and the latter may be used inside of _cleanup_ for such a directory string variable, to automatically rmdir() the directory if it is non-NULL when the scope exits. rmdir_and_free() is similar to the existing rm_rf_and_free() however, is only removes a single directory and does not operate recursively. --- src/basic/fileio.c | 19 +++++++++++++++++++ src/basic/fileio.h | 2 ++ src/basic/fs-util.h | 7 +++++++ 3 files changed, 28 insertions(+) diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 1615456659..c43b0583a4 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -1409,3 +1409,22 @@ int read_nul_string(FILE *f, char **ret) { return 0; } + +int mkdtemp_malloc(const char *template, char **ret) { + char *p; + + assert(template); + assert(ret); + + p = strdup(template); + if (!p) + return -ENOMEM; + + if (!mkdtemp(p)) { + free(p); + return -errno; + } + + *ret = p; + return 0; +} diff --git a/src/basic/fileio.h b/src/basic/fileio.h index b58c83e64a..17b38a5d60 100644 --- a/src/basic/fileio.h +++ b/src/basic/fileio.h @@ -88,3 +88,5 @@ int open_tmpfile_linkable(const char *target, int flags, char **ret_path); int link_tmpfile(int fd, const char *path, const char *target); int read_nul_string(FILE *f, char **ret); + +int mkdtemp_malloc(const char *template, char **ret); diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 0d925c6b84..5fe5c71ff0 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -84,3 +84,10 @@ enum { }; int chase_symlinks(const char *path_with_prefix, const char *root, unsigned flags, char **ret); + +/* Useful for usage with _cleanup_(), removes a directory and frees the pointer */ +static inline void rmdir_and_free(char *p) { + (void) rmdir(p); + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rmdir_and_free); From 9153b02bb5030e29d6008992fb74b9028d7c392c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 1 Dec 2016 23:24:20 +0100 Subject: [PATCH 05/12] machined: add API for querying the OS release of a machine image This adds a bus call GetImageOSRelease() to the Manager interface that retrieves the /etc/os-release file of a machine image. It matches the existing GetMachineOSRelease() call, however operates on a disk image rather than a running container. The backend for this call on .raw images is implemented via the generalized image dissector, which makes this scheme relatively easy to implement. --- src/machine/image-dbus.c | 165 ++++++++++++++++++++++ src/machine/image-dbus.h | 1 + src/machine/machine-dbus.c | 40 ++---- src/machine/machine-dbus.h | 2 + src/machine/machinectl.c | 33 +++-- src/machine/machined-dbus.c | 52 +++++++ src/machine/org.freedesktop.machine1.conf | 8 ++ 7 files changed, 261 insertions(+), 40 deletions(-) diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index 867bbc467b..400d8ec7b0 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -17,14 +17,23 @@ along with systemd; If not, see . ***/ +#include + #include "alloc-util.h" #include "bus-label.h" #include "bus-util.h" +#include "copy.h" +#include "dissect-image.h" #include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" #include "image-dbus.h" #include "io-util.h" +#include "loop-util.h" #include "machine-image.h" +#include "mount-util.h" #include "process-util.h" +#include "raw-clone.h" #include "strv.h" #include "user-util.h" @@ -279,6 +288,161 @@ int bus_image_method_set_limit( return sd_bus_reply_method_return(message, NULL); } +#define EXIT_NOT_FOUND 2 + +static int directory_image_get_os_release(Image *image, char ***ret, sd_bus_error *error) { + + _cleanup_free_ char *path = NULL; + _cleanup_close_ int fd = -1; + int r; + + assert(image); + assert(ret); + + r = chase_symlinks("/etc/os-release", image->path, CHASE_PREFIX_ROOT, &path); + if (r == -ENOENT) + r = chase_symlinks("/usr/lib/os-release", image->path, CHASE_PREFIX_ROOT, &path); + if (r == -ENOENT) + return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Image does not contain OS release information"); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to resolve %s: %m", image->path); + + r = load_env_file_pairs(NULL, path, NULL, ret); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to open %s: %m", path); + + return 0; +} + +static int raw_image_get_os_release(Image *image, char ***ret, sd_bus_error *error) { + _cleanup_(rmdir_and_freep) char *t = NULL; + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + _cleanup_(sigkill_waitp) pid_t child = 0; + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_strv_free_ char **v = NULL; + siginfo_t si; + int r; + + assert(image); + assert(ret); + + r = mkdtemp_malloc("/tmp/machined-root-XXXXXX", &t); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to create temporary directory: %m"); + + r = loop_device_make_by_path(image->path, O_RDONLY, &d); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to set up loop block device for %s: %m", image->path); + + r = dissect_image(d->fd, &m); + if (r == -ENOPKG) + return sd_bus_error_set_errnof(error, r, "Disk image %s not understood: %m", image->path); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to dissect image %s: %m", image->path); + + if (pipe2(pair, O_CLOEXEC) < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to create communication pipe: %m"); + + child = raw_clone(SIGCHLD|CLONE_NEWNS); + if (child < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to fork(): %m"); + + if (child == 0) { + int fd; + + pair[0] = safe_close(pair[0]); + + /* Make sure we never propagate to the host */ + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) + _exit(EXIT_FAILURE); + + r = dissected_image_mount(m, t, DISSECTED_IMAGE_READ_ONLY); + if (r < 0) + _exit(EXIT_FAILURE); + + r = mount_move_root(t); + if (r < 0) + _exit(EXIT_FAILURE); + + fd = open("/etc/os-release", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0 && errno == ENOENT) { + fd = open("/usr/lib/os-release", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0 && errno == ENOENT) + _exit(EXIT_NOT_FOUND); + } + if (fd < 0) + _exit(EXIT_FAILURE); + + r = copy_bytes(fd, pair[1], (uint64_t) -1, false); + if (r < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + f = fdopen(pair[0], "re"); + if (!f) + return -errno; + + pair[0] = -1; + + r = load_env_file_pairs(f, "os-release", NULL, &v); + if (r < 0) + return r; + + r = wait_for_terminate(child, &si); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m"); + child = 0; + if (si.si_code == CLD_EXITED && si.si_status == EXIT_NOT_FOUND) + return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Image does not contain OS release information"); + if (si.si_code != CLD_EXITED || si.si_status != EXIT_SUCCESS) + return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally."); + + *ret = v; + v = NULL; + + return 0; +} + +int bus_image_method_get_os_release( + sd_bus_message *message, + void *userdata, + sd_bus_error *error) { + + _cleanup_release_lock_file_ LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT; + _cleanup_strv_free_ char **v = NULL; + Image *image = userdata; + int r; + + r = image_path_lock(image->path, LOCK_SH|LOCK_NB, &tree_global_lock, &tree_local_lock); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to lock image: %m"); + + switch (image->type) { + + case IMAGE_DIRECTORY: + case IMAGE_SUBVOLUME: + r = directory_image_get_os_release(image, &v, error); + break; + + case IMAGE_RAW: + r = raw_image_get_os_release(image, &v, error); + break; + + default: + assert_not_reached("Unknown image type"); + } + if (r < 0) + return r; + + return bus_reply_pair_array(message, v); +} + const sd_bus_vtable image_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0), @@ -296,6 +460,7 @@ const sd_bus_vtable image_vtable[] = { SD_BUS_METHOD("Clone", "sb", NULL, bus_image_method_clone, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("MarkReadOnly", "b", NULL, bus_image_method_mark_read_only, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetLimit", "t", NULL, bus_image_method_set_limit, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_image_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_VTABLE_END }; diff --git a/src/machine/image-dbus.h b/src/machine/image-dbus.h index b62da996c6..bc8a6c3400 100644 --- a/src/machine/image-dbus.h +++ b/src/machine/image-dbus.h @@ -33,3 +33,4 @@ int bus_image_method_rename(sd_bus_message *message, void *userdata, sd_bus_erro int bus_image_method_clone(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_image_method_mark_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_image_method_set_limit(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_image_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error); diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index 28e4867cb3..af745b6567 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -356,11 +356,11 @@ int bus_machine_method_get_addresses(sd_bus_message *message, void *userdata, sd return sd_bus_send(NULL, reply, NULL); } +#define EXIT_NOT_FOUND 2 + int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) { - _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_strv_free_ char **l = NULL; Machine *m = userdata; - char **k, **v; int r; assert(message); @@ -394,7 +394,7 @@ int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, s return sd_bus_error_set_errnof(error, errno, "Failed to fork(): %m"); if (child == 0) { - _cleanup_close_ int fd = -1; + int fd = -1; pair[0] = safe_close(pair[0]); @@ -402,12 +402,14 @@ int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, s if (r < 0) _exit(EXIT_FAILURE); - fd = open("/etc/os-release", O_RDONLY|O_CLOEXEC); - if (fd < 0) { - fd = open("/usr/lib/os-release", O_RDONLY|O_CLOEXEC); - if (fd < 0) - _exit(EXIT_FAILURE); + fd = open("/etc/os-release", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0 && errno == ENOENT) { + fd = open("/usr/lib/os-release", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0 && errno == ENOENT) + _exit(EXIT_NOT_FOUND); } + if (fd < 0) + _exit(EXIT_FAILURE); r = copy_bytes(fd, pair[1], (uint64_t) -1, false); if (r < 0) @@ -431,6 +433,8 @@ int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, s r = wait_for_terminate(child, &si); if (r < 0) return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m"); + if (si.si_code == CLD_EXITED && si.si_status == EXIT_NOT_FOUND) + return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Machine does not contain OS release information"); if (si.si_code != CLD_EXITED || si.si_status != EXIT_SUCCESS) return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally."); @@ -441,25 +445,7 @@ int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, s return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Requesting OS release data is only supported on container machines."); } - r = sd_bus_message_new_method_return(message, &reply); - if (r < 0) - return r; - - r = sd_bus_message_open_container(reply, 'a', "{ss}"); - if (r < 0) - return r; - - STRV_FOREACH_PAIR(k, v, l) { - r = sd_bus_message_append(reply, "{ss}", *k, *v); - if (r < 0) - return r; - } - - r = sd_bus_message_close_container(reply); - if (r < 0) - return r; - - return sd_bus_send(NULL, reply, NULL); + return bus_reply_pair_array(message, l); } int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_error *error) { diff --git a/src/machine/machine-dbus.h b/src/machine/machine-dbus.h index 241b23c7ec..c513783480 100644 --- a/src/machine/machine-dbus.h +++ b/src/machine/machine-dbus.h @@ -42,3 +42,5 @@ int bus_machine_method_open_root_directory(sd_bus_message *message, void *userda int machine_send_signal(Machine *m, bool new_machine); int machine_send_create_reply(Machine *m, sd_bus_error *error); + +int bus_reply_pair_array(sd_bus_message *m, char **l); diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 9c754b4327..3294ea7821 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -138,7 +138,7 @@ static void clean_machine_info(MachineInfo *machines, size_t n_machines) { free(machines); } -static int get_os_release_property(sd_bus *bus, const char *name, const char *query, ...) { +static int call_get_os_release(sd_bus *bus, const char *method, const char *name, const char *query, ...) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; const char *k, *v, *iter, **query_res = NULL; size_t count = 0, awaited_args = 0; @@ -153,12 +153,13 @@ static int get_os_release_property(sd_bus *bus, const char *name, const char *qu awaited_args++; query_res = newa0(const char *, awaited_args); - r = sd_bus_call_method(bus, - "org.freedesktop.machine1", - "/org/freedesktop/machine1", - "org.freedesktop.machine1.Manager", - "GetMachineOSRelease", - NULL, &reply, "s", name); + r = sd_bus_call_method( + bus, + "org.freedesktop.machine1", + "/org/freedesktop/machine1", + "org.freedesktop.machine1.Manager", + method, + NULL, &reply, "s", name); if (r < 0) return r; @@ -193,7 +194,7 @@ static int get_os_release_property(sd_bus *bus, const char *name, const char *qu val = strdup(query_res[count]); if (!val) { va_end(ap); - return log_oom(); + return -ENOMEM; } *out = val; } @@ -249,8 +250,12 @@ static int list_machines(int argc, char *argv[], void *userdata) { machines[n_machines].os = NULL; machines[n_machines].version_id = NULL; - r = get_os_release_property(bus, name, - "ID\0" "VERSION_ID\0", + r = call_get_os_release( + bus, + "GetMachineOSRelease", + name, + "ID\0" + "VERSION_ID\0", &machines[n_machines].os, &machines[n_machines].version_id); if (r < 0) @@ -610,7 +615,7 @@ static int print_addresses(sd_bus *bus, const char *name, int ifi, const char *p return 0; } -static int print_os_release(sd_bus *bus, const char *name, const char *prefix) { +static int print_os_release(sd_bus *bus, const char *method, const char *name, const char *prefix) { _cleanup_free_ char *pretty = NULL; int r; @@ -618,7 +623,7 @@ static int print_os_release(sd_bus *bus, const char *name, const char *prefix) { assert(name); assert(prefix); - r = get_os_release_property(bus, name, "PRETTY_NAME\0", &pretty, NULL); + r = call_get_os_release(bus, method, name, "PRETTY_NAME\0", &pretty, NULL); if (r < 0) return r; @@ -729,7 +734,7 @@ static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) { "\n\t ", ALL_IP_ADDRESSES); - print_os_release(bus, i->name, "\t OS: "); + print_os_release(bus, "GetMachineOSRelease", i->name, "\t OS: "); if (i->unit) { printf("\t Unit: %s\n", i->unit); @@ -927,6 +932,8 @@ static void print_image_status_info(sd_bus *bus, ImageStatusInfo *i) { if (i->path) printf("\t Path: %s\n", i->path); + print_os_release(bus, "GetImageOSRelease", i->name, "\t OS: "); + printf("\t RO: %s%s%s\n", i->read_only ? ansi_highlight_red() : "", i->read_only ? "read-only" : "writable", diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c index 3ee3938ebb..fd9e5b56fc 100644 --- a/src/machine/machined-dbus.c +++ b/src/machine/machined-dbus.c @@ -825,6 +825,30 @@ static int method_mark_image_read_only(sd_bus_message *message, void *userdata, return bus_image_method_mark_read_only(message, i, error); } +static int method_get_image_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_(image_unrefp) Image *i = NULL; + const char *name; + int r; + + assert(message); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + if (!image_name_is_valid(name)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", name); + + r = image_find(name, &i); + if (r < 0) + return r; + if (r == 0) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_IMAGE, "No image '%s' known", name); + + i->userdata = userdata; + return bus_image_method_get_os_release(message, i, error); +} + static int clean_pool_done(Operation *operation, int ret, sd_bus_error *error) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_fclose_ FILE *f = NULL; @@ -1396,6 +1420,7 @@ const sd_bus_vtable manager_vtable[] = { SD_BUS_METHOD("RenameImage", "ss", NULL, method_rename_image, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("CloneImage", "ssb", NULL, method_clone_image, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("MarkImageReadOnly", "sb", NULL, method_mark_image_read_only, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("GetImageOSRelease", "s", "a{ss}", method_get_image_os_release, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetPoolLimit", "t", NULL, method_set_pool_limit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetImageLimit", "st", NULL, method_set_image_limit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("CleanPool", "s", "a(st)", method_clean_pool, SD_BUS_VTABLE_UNPRIVILEGED), @@ -1804,3 +1829,30 @@ int manager_add_machine(Manager *m, const char *name, Machine **_machine) { return 0; } + +int bus_reply_pair_array(sd_bus_message *m, char **l) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + char **k, **v; + int r; + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{ss}"); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(k, v, l) { + r = sd_bus_message_append(reply, "{ss}", *k, *v); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + return sd_bus_send(NULL, reply, NULL); + +} diff --git a/src/machine/org.freedesktop.machine1.conf b/src/machine/org.freedesktop.machine1.conf index 562b9d3cc0..82ebfba50c 100644 --- a/src/machine/org.freedesktop.machine1.conf +++ b/src/machine/org.freedesktop.machine1.conf @@ -116,6 +116,10 @@ send_interface="org.freedesktop.machine1.Manager" send_member="SetImageLimit"/> + + @@ -192,6 +196,10 @@ send_interface="org.freedesktop.machine1.Image" send_member="MarkReadOnly"/> + + From 9f3c7fc1ad0d3d46e14b737e912ae8ca15f4e072 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 2 Dec 2016 17:01:19 +0100 Subject: [PATCH 06/12] util-lib: drop unnecessary NULL check DEFINE_TRIVIAL_CLEANUP_FUNC() already does that check, no need to duplicate it. --- src/basic/rm-rf.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/basic/rm-rf.h b/src/basic/rm-rf.h index f693a5bb7c..e13f7003e3 100644 --- a/src/basic/rm-rf.h +++ b/src/basic/rm-rf.h @@ -33,8 +33,6 @@ int rm_rf(const char *path, RemoveFlags flags); /* Useful for usage with _cleanup_(), destroys a directory and frees the pointer */ static inline void rm_rf_physical_and_free(char *p) { - if (!p) - return; (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL); free(p); } From a2ea3b2fc82bdd92cf782de61365113bb6ed9ca7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 2 Dec 2016 19:32:09 +0100 Subject: [PATCH 07/12] dissect: add small "systemd-dissect" tool as wrapper around dissect-image.c This adds a small tool that may be used to look into OS images, and mount them to any place. This is mostly a friendlier version of test-dissect-image.c. I am not sure this should really become a proper command of systemd, hence for now do not install it into bindir, but simply libexecdir. This tool is already pretty useful since you can mount image files with it, honouring the various partitions correctly. I figure this is going to become more interesting if the dissctor learns luks and verity support. --- .gitignore | 1 + Makefile.am | 12 +++ src/dissect/Makefile | 1 + src/dissect/dissect.c | 206 +++++++++++++++++++++++++++++++++++++++++ src/shared/loop-util.c | 13 ++- src/shared/loop-util.h | 4 +- 6 files changed, 234 insertions(+), 3 deletions(-) create mode 120000 src/dissect/Makefile create mode 100644 src/dissect/dissect.c diff --git a/.gitignore b/.gitignore index 405b07ef4d..baa975d813 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ /systemd-debug-generator /systemd-delta /systemd-detect-virt +/systemd-dissect /systemd-escape /systemd-export /systemd-firstboot diff --git a/Makefile.am b/Makefile.am index c47a07a1f0..c6adf3a65c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -404,6 +404,11 @@ rootlibexec_PROGRAMS = \ systemd-socket-proxyd \ systemd-update-done +if HAVE_BLKID +rootlibexec_PROGRAMS += \ + systemd-dissect +endif + if HAVE_UTMP rootlibexec_PROGRAMS += \ systemd-update-utmp @@ -3073,6 +3078,13 @@ systemd_notify_SOURCES = \ systemd_notify_LDADD = \ libsystemd-shared.la +# ------------------------------------------------------------------------------ +systemd_dissect_SOURCES = \ + src/dissect/dissect.c + +systemd_dissect_LDADD = \ + libsystemd-shared.la + # ------------------------------------------------------------------------------ systemd_path_SOURCES = \ src/path/path.c diff --git a/src/dissect/Makefile b/src/dissect/Makefile new file mode 120000 index 0000000000..d0b0e8e008 --- /dev/null +++ b/src/dissect/Makefile @@ -0,0 +1 @@ +../Makefile \ No newline at end of file diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c new file mode 100644 index 0000000000..93ece05948 --- /dev/null +++ b/src/dissect/dissect.c @@ -0,0 +1,206 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "architecture.h" +#include "dissect-image.h" +#include "log.h" +#include "loop-util.h" +#include "string-util.h" +#include "util.h" + +static enum { + ACTION_DISSECT, + ACTION_MOUNT, +} arg_action = ACTION_DISSECT; +static const char *arg_image = NULL; +static const char *arg_path = NULL; +static bool arg_read_only = false; + +static void help(void) { + printf("%s [OPTIONS...] IMAGE\n" + "%s [OPTIONS...] --mount IMAGE PATH\n" + "Dissect a file system OS image.\n\n" + " -h --help Show this help\n" + " --version Show package version\n" + " -m --mount Mount the image to the specified directory\n" + " -r --read-only Mount read-only\n", + program_invocation_short_name, + program_invocation_short_name); +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "mount", no_argument, NULL, 'm' }, + { "read-only", no_argument, NULL, 'r' }, + {} + }; + + int c; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hmr", options, NULL)) >= 0) { + + switch (c) { + + case 'h': + help(); + return 0; + + case ARG_VERSION: + return version(); + + case 'm': + arg_action = ACTION_MOUNT; + break; + + case 'r': + arg_read_only = true; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached("Unhandled option"); + } + + } + + switch (arg_action) { + + case ACTION_DISSECT: + if (optind + 1 != argc) { + log_error("Expected a file path as only argument."); + return -EINVAL; + } + + arg_image = argv[optind]; + arg_read_only = true; + break; + + case ACTION_MOUNT: + if (optind + 2 != argc) { + log_error("Expected a file path and mount point path as only arguments."); + return -EINVAL; + } + + arg_image = argv[optind]; + arg_path = argv[optind + 1]; + break; + + default: + assert_not_reached("Unknown action."); + } + + return 1; +} + +int main(int argc, char *argv[]) { + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + int r; + + log_parse_environment(); + log_open(); + + r = parse_argv(argc, argv); + if (r <= 0) + goto finish; + + r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &d); + if (r < 0) { + log_error_errno(r, "Failed to set up loopback device: %m"); + goto finish; + } + + r = dissect_image(d->fd, &m); + if (r == -ENOPKG) { + log_error_errno(r, "Couldn't identify a suitable partition table or file system in %s.", arg_image); + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to dissect image: %m"); + goto finish; + } + + switch (arg_action) { + + case ACTION_DISSECT: { + unsigned i; + + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + DissectedPartition *p = m->partitions + i; + + if (!p->found) + continue; + + printf("Found %s '%s' partition", + p->rw ? "writable" : "read-only", + partition_designator_to_string(i)); + + if (p->fstype) + printf(" of type %s", p->fstype); + + if (p->architecture != _ARCHITECTURE_INVALID) + printf(" for %s", architecture_to_string(p->architecture)); + + if (p->partno >= 0) + printf(" on partition #%i", p->partno); + + if (p->node) + printf(" (%s)", p->node); + + putchar('\n'); + } + + break; + } + + case ACTION_MOUNT: + r = dissected_image_mount(m, arg_path, + (arg_read_only ? DISSECTED_IMAGE_READ_ONLY : 0) | + DISSECTED_IMAGE_DISCARD_ON_LOOP); + if (r < 0) { + log_error_errno(r, "Failed to mount image: %m"); + goto finish; + } + + loop_device_relinquish(d); + break; + + default: + assert_not_reached("Unknown action."); + } + +finish: + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c index 8be4dbf938..047e213634 100644 --- a/src/shared/loop-util.c +++ b/src/shared/loop-util.c @@ -129,7 +129,7 @@ LoopDevice* loop_device_unref(LoopDevice *d) { if (d->fd >= 0) { - if (d->nr >= 0) { + if (d->nr >= 0 && !d->relinquished) { if (ioctl(d->fd, LOOP_CLR_FD) < 0) log_debug_errno(errno, "Failed to clear loop device: %m"); @@ -138,7 +138,7 @@ LoopDevice* loop_device_unref(LoopDevice *d) { safe_close(d->fd); } - if (d->nr >= 0) { + if (d->nr >= 0 && !d->relinquished) { _cleanup_close_ int control = -1; control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); @@ -155,3 +155,12 @@ LoopDevice* loop_device_unref(LoopDevice *d) { return NULL; } + +void loop_device_relinquish(LoopDevice *d) { + assert(d); + + /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel + * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */ + + d->relinquished = true; +} diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h index 5c847c4ac3..45fead5f18 100644 --- a/src/shared/loop-util.h +++ b/src/shared/loop-util.h @@ -29,11 +29,13 @@ struct LoopDevice { int fd; int nr; char *node; + bool relinquished; }; int loop_device_make(int fd, int open_flags, LoopDevice **ret); int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret); LoopDevice* loop_device_unref(LoopDevice *d); - DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref); + +void loop_device_relinquish(LoopDevice *d); From cf139e6025d499eb93ff51acb1218662a208ff96 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 5 Dec 2016 16:09:04 +0100 Subject: [PATCH 08/12] minor code beautifications --- src/core/umount.c | 19 ++++++++++++------- src/cryptsetup/cryptsetup.c | 2 +- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/core/umount.c b/src/core/umount.c index 1e5459ed80..2f4b12bdb9 100644 --- a/src/core/umount.c +++ b/src/core/umount.c @@ -344,24 +344,29 @@ static int delete_loopback(const char *device) { } static int delete_dm(dev_t devnum) { - _cleanup_close_ int fd = -1; - int r; + struct dm_ioctl dm = { - .version = {DM_VERSION_MAJOR, - DM_VERSION_MINOR, - DM_VERSION_PATCHLEVEL}, + .version = { + DM_VERSION_MAJOR, + DM_VERSION_MINOR, + DM_VERSION_PATCHLEVEL + }, .data_size = sizeof(dm), .dev = devnum, }; + _cleanup_close_ int fd = -1; + assert(major(devnum) != 0); fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC); if (fd < 0) return -errno; - r = ioctl(fd, DM_DEV_REMOVE, &dm); - return r >= 0 ? 0 : -errno; + if (ioctl(fd, DM_DEV_REMOVE, &dm) < 0) + return -errno; + + return 0; } static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_error) { diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c index 01e7ee9973..c7fec609df 100644 --- a/src/cryptsetup/cryptsetup.c +++ b/src/cryptsetup/cryptsetup.c @@ -651,7 +651,7 @@ int main(int argc, char *argv[]) { k = crypt_init(&cd, arg_header); } else k = crypt_init(&cd, argv[3]); - if (k) { + if (k != 0) { log_error_errno(k, "crypt_init() failed: %m"); goto finish; } From 18b5886e562a3702ed8923e568a7555d2ab1880a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 5 Dec 2016 16:26:48 +0100 Subject: [PATCH 09/12] dissect: add support for encrypted images This adds support to the image dissector to deal with encrypted images (only LUKS). Given that we now have a neatly isolated image dissector codebase, let's add a new feature to it: support for automatically dealing with encrypted images. This is then exposed in systemd-dissect and nspawn. It's pretty basic: only support for passphrase-based encryption. In order to ensure that "systemd-dissect --mount" results in mount points whose backing LUKS DM devices are cleaned up automatically we use the DM_DEV_REMOVE ioctl() directly on the device (in DM_DEFERRED_REMOVE mode). libgcryptsetup at the moment doesn't provide a proper API for this. Thankfully, the ioctl() API is pretty easy to use. --- Makefile.am | 10 +- src/dissect/dissect.c | 46 +++- src/machine/image-dbus.c | 2 +- src/nspawn/nspawn.c | 9 +- src/shared/dissect-image.c | 457 ++++++++++++++++++++++++++++++++----- src/shared/dissect-image.h | 22 +- 6 files changed, 467 insertions(+), 79 deletions(-) diff --git a/Makefile.am b/Makefile.am index c6adf3a65c..1895e33e05 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1086,7 +1086,8 @@ libshared_la_CFLAGS = \ $(ACL_CFLAGS) \ $(LIBIDN_CFLAGS) \ $(SECCOMP_CFLAGS) \ - $(BLKID_CFLAGS) + $(BLKID_CFLAGS) \ + $(LIBCRYPTSETUP_CFLAGS) libshared_la_LIBADD = \ libsystemd-internal.la \ @@ -1096,7 +1097,8 @@ libshared_la_LIBADD = \ $(ACL_LIBS) \ $(LIBIDN_LIBS) \ $(SECCOMP_LIBS) \ - $(BLKID_LIBS) + $(BLKID_LIBS) \ + $(LIBCRYPTSETUP_LIBS) rootlibexec_LTLIBRARIES += \ libsystemd-shared.la @@ -1119,6 +1121,7 @@ libsystemd_shared_la_CFLAGS = \ $(LIBIDN_CFLAGS) \ $(SECCOMP_CFLAGS) \ $(BLKID_CFLAGS) \ + $(LIBCRYPTSETUP_CFLAGS) \ -fvisibility=default # We can't use libshared_la_LIBADD here because it would @@ -1131,7 +1134,8 @@ libsystemd_shared_la_LIBADD = \ $(ACL_LIBS) \ $(LIBIDN_LIBS) \ $(SECCOMP_LIBS) \ - $(BLKID_LIBS) + $(BLKID_LIBS) \ + $(LIBCRYPTSETUP_LIBS) libsystemd_shared_la_LDFLAGS = \ $(AM_LDFLAGS) \ diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 93ece05948..5e6848acb4 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -34,7 +34,7 @@ static enum { } arg_action = ACTION_DISSECT; static const char *arg_image = NULL; static const char *arg_path = NULL; -static bool arg_read_only = false; +static DissectImageFlags arg_flags = DISSECT_IMAGE_DISCARD_ON_LOOP; static void help(void) { printf("%s [OPTIONS...] IMAGE\n" @@ -43,7 +43,8 @@ static void help(void) { " -h --help Show this help\n" " --version Show package version\n" " -m --mount Mount the image to the specified directory\n" - " -r --read-only Mount read-only\n", + " -r --read-only Mount read-only\n" + " --discard=MODE Choose 'discard' mode (disabled, loop, all, crypto)\n", program_invocation_short_name, program_invocation_short_name); } @@ -52,6 +53,7 @@ static int parse_argv(int argc, char *argv[]) { enum { ARG_VERSION = 0x100, + ARG_DISCARD, }; static const struct option options[] = { @@ -59,6 +61,7 @@ static int parse_argv(int argc, char *argv[]) { { "version", no_argument, NULL, ARG_VERSION }, { "mount", no_argument, NULL, 'm' }, { "read-only", no_argument, NULL, 'r' }, + { "discard", required_argument, NULL, ARG_DISCARD }, {} }; @@ -83,7 +86,23 @@ static int parse_argv(int argc, char *argv[]) { break; case 'r': - arg_read_only = true; + arg_flags |= DISSECT_IMAGE_READ_ONLY; + break; + + case ARG_DISCARD: + if (streq(optarg, "disabled")) + arg_flags &= ~(DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_DISCARD|DISSECT_IMAGE_DISCARD_ON_CRYPTO); + else if (streq(optarg, "loop")) + arg_flags = (arg_flags & ~(DISSECT_IMAGE_DISCARD|DISSECT_IMAGE_DISCARD_ON_CRYPTO)) | DISSECT_IMAGE_DISCARD_ON_LOOP; + else if (streq(optarg, "all")) + arg_flags = (arg_flags & ~(DISSECT_IMAGE_DISCARD_ON_CRYPTO)) | DISSECT_IMAGE_DISCARD_ON_LOOP | DISSECT_IMAGE_DISCARD; + else if (streq(optarg, "crypt")) + arg_flags |= DISSECT_IMAGE_DISCARD_ON_LOOP | DISSECT_IMAGE_DISCARD | DISSECT_IMAGE_DISCARD_ON_CRYPTO; + else { + log_error("Unknown --discard= parameter: %s", optarg); + return -EINVAL; + } + break; case '?': @@ -104,7 +123,7 @@ static int parse_argv(int argc, char *argv[]) { } arg_image = argv[optind]; - arg_read_only = true; + arg_flags |= DISSECT_IMAGE_READ_ONLY; break; case ACTION_MOUNT: @@ -126,6 +145,7 @@ static int parse_argv(int argc, char *argv[]) { int main(int argc, char *argv[]) { _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(decrypted_image_unrefp) DecryptedImage *di = NULL; _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; int r; @@ -136,7 +156,7 @@ int main(int argc, char *argv[]) { if (r <= 0) goto finish; - r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &d); + r = loop_device_make_by_path(arg_image, (arg_flags & DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR, &d); if (r < 0) { log_error_errno(r, "Failed to set up loopback device: %m"); goto finish; @@ -186,14 +206,24 @@ int main(int argc, char *argv[]) { } case ACTION_MOUNT: - r = dissected_image_mount(m, arg_path, - (arg_read_only ? DISSECTED_IMAGE_READ_ONLY : 0) | - DISSECTED_IMAGE_DISCARD_ON_LOOP); + r = dissected_image_decrypt_interactively(m, NULL, arg_flags, &di); + if (r < 0) + goto finish; + + r = dissected_image_mount(m, arg_path, arg_flags); if (r < 0) { log_error_errno(r, "Failed to mount image: %m"); goto finish; } + if (di) { + r = decrypted_image_relinquish(di); + if (r < 0) { + log_error_errno(r, "Failed to relinquish DM devices: %m"); + goto finish; + } + } + loop_device_relinquish(d); break; diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index 400d8ec7b0..65953b368f 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -358,7 +358,7 @@ static int raw_image_get_os_release(Image *image, char ***ret, sd_bus_error *err if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) _exit(EXIT_FAILURE); - r = dissected_image_mount(m, t, DISSECTED_IMAGE_READ_ONLY); + r = dissected_image_mount(m, t, DISSECT_IMAGE_READ_ONLY); if (r < 0) _exit(EXIT_FAILURE); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 6ad20f7457..035456f45b 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2365,7 +2365,7 @@ static int outer_child( return r; if (dissected_image) { - r = dissected_image_mount(dissected_image, directory, DISSECTED_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECTED_IMAGE_READ_ONLY : 0)); + r = dissected_image_mount(dissected_image, directory, DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0)); if (r < 0) return r; } @@ -3410,8 +3410,9 @@ int main(int argc, char *argv[]) { _cleanup_release_lock_file_ LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT; bool interactive, veth_created = false, remove_tmprootdir = false; char tmprootdir[] = "/tmp/nspawn-root-XXXXXX"; - _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL; + _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; log_parse_environment(); log_open(); @@ -3652,6 +3653,10 @@ int main(int argc, char *argv[]) { goto finish; } + r = dissected_image_decrypt_interactively(dissected_image, NULL, 0, &decrypted_image); + if (r < 0) + goto finish; + /* Now that we mounted the image, let's try to remove it again, if it is ephemeral */ if (remove_image && unlink(arg_image) >= 0) remove_image = false; diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 7b65daa0eb..bc4e45be6e 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -17,19 +17,73 @@ along with systemd; If not, see . ***/ +#ifdef HAVE_LIBCRYPTSETUP +#include +#endif +#include #include #include "architecture.h" +#include "ask-password-api.h" #include "blkid-util.h" #include "dissect-image.h" +#include "fd-util.h" #include "gpt.h" #include "mount-util.h" #include "path-util.h" #include "stat-util.h" +#include "stdio-util.h" #include "string-table.h" #include "string-util.h" #include "udev-util.h" +static int probe_filesystem(const char *node, char **ret_fstype) { +#ifdef HAVE_BLKID + _cleanup_blkid_free_probe_ blkid_probe b = NULL; + const char *fstype; + int r; + + b = blkid_new_probe_from_filename(node); + if (!b) + return -ENOMEM; + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2 || r == 1) { + log_debug("Failed to identify any partition type on partition %s", node); + goto not_found; + } + if (r != 0) { + if (errno == 0) + return -EIO; + + return -errno; + } + + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + + if (fstype) { + char *t; + + t = strdup(fstype); + if (!t) + return -ENOMEM; + + *ret_fstype = t; + return 1; + } + +not_found: + *ret_fstype = NULL; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + int dissect_image(int fd, DissectedImage **ret) { #ifdef HAVE_BLKID @@ -96,7 +150,7 @@ int dissect_image(int fd, DissectedImage **ret) { return -ENOMEM; (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); - if (streq_ptr(usage, "filesystem")) { + if (STRPTR_IN_SET(usage, "filesystem", "crypto")) { _cleanup_free_ char *t = NULL, *n = NULL; const char *fstype = NULL; @@ -123,6 +177,8 @@ int dissect_image(int fd, DissectedImage **ret) { t = n = NULL; + m->encrypted = streq(fstype, "crypto_LUKS"); + *ret = m; m = NULL; @@ -385,52 +441,24 @@ int dissect_image(int fd, DissectedImage **ret) { return -ENXIO; } + blkid_free_probe(b); + b = NULL; + /* Fill in file system types if we don't know them yet. */ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { - const char *fstype; + DissectedPartition *p = m->partitions + i; - if (!m->partitions[i].found) /* not found? */ + if (!p->found) continue; - if (m->partitions[i].fstype) /* already know the type? */ - continue; - - if (!m->partitions[i].node) /* have no device node for? */ - continue; - - if (b) - blkid_free_probe(b); - - b = blkid_new_probe_from_filename(m->partitions[i].node); - if (!b) - return -ENOMEM; - - blkid_probe_enable_superblocks(b, 1); - blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); - - errno = 0; - r = blkid_do_safeprobe(b); - if (r == -2 || r == 1) { - log_debug("Failed to identify any partition type on partition %i", m->partitions[i].partno); - continue; - } - if (r != 0) { - if (errno == 0) - return -EIO; - - return -errno; + if (!p->fstype && p->node) { + r = probe_filesystem(p->node, &p->fstype); + if (r < 0) + return r; } - (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); - if (fstype) { - char *t; - - t = strdup(fstype); - if (!t) - return -ENOMEM; - - m->partitions[i].fstype = t; - } + if (streq_ptr(p->fstype, "crypto_LUKS")) + m->encrypted = true; } *ret = m; @@ -451,48 +479,79 @@ DissectedImage* dissected_image_unref(DissectedImage *m) { for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { free(m->partitions[i].fstype); free(m->partitions[i].node); + free(m->partitions[i].decrypted_fstype); + free(m->partitions[i].decrypted_node); } free(m); return NULL; } -static int mount_partition(DissectedPartition *m, const char *where, const char *directory, DissectedImageMountFlags flags) { - const char *p, *options = NULL; +static int is_loop_device(const char *path) { + char s[strlen("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen("/../loop/")]; + struct stat st; + + assert(path); + + if (stat(path, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + xsprintf(s, "/sys/dev/block/%u:%u/loop/", major(st.st_rdev), minor(st.st_rdev)); + if (access(s, F_OK) < 0) { + if (errno != ENOENT) + return -errno; + + /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */ + xsprintf(s, "/sys/dev/block/%u:%u/../loop/", major(st.st_rdev), minor(st.st_rdev)); + if (access(s, F_OK) < 0) + return errno == ENOENT ? false : -errno; + } + + return true; +} + +static int mount_partition( + DissectedPartition *m, + const char *where, + const char *directory, + DissectImageFlags flags) { + + const char *p, *options = NULL, *node, *fstype; bool rw; assert(m); assert(where); - if (!m->found || !m->node || !m->fstype) + node = m->decrypted_node ?: m->node; + fstype = m->decrypted_fstype ?: m->fstype; + + if (!m->found || !node || !fstype) return 0; - rw = m->rw && !(flags & DISSECTED_IMAGE_READ_ONLY); + /* Stacked encryption? Yuck */ + if (streq_ptr(fstype, "crypto_LUKS")) + return -ELOOP; + + rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY); if (directory) p = strjoina(where, directory); else p = where; - /* Not supported for now. */ - if (streq(m->fstype, "crypto_LUKS")) - return -EOPNOTSUPP; + /* If requested, turn on discard support. */ + if (STR_IN_SET(fstype, "btrfs", "ext4", "vfat", "xfs") && + ((flags & DISSECT_IMAGE_DISCARD) || + ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) + options = "discard"; - /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains - * sparse when possible. */ - if ((flags & DISSECTED_IMAGE_DISCARD_ON_LOOP) && - STR_IN_SET(m->fstype, "btrfs", "ext4", "vfat", "xfs")) { - const char *l; - - l = path_startswith(m->node, "/dev"); - if (l && startswith(l, "loop")) - options = "discard"; - } - - return mount_verbose(LOG_DEBUG, m->node, p, m->fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); + return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); } -int dissected_image_mount(DissectedImage *m, const char *where, DissectedImageMountFlags flags) { +int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlags flags) { int r; assert(m); @@ -536,6 +595,284 @@ int dissected_image_mount(DissectedImage *m, const char *where, DissectedImageMo return 0; } +#ifdef HAVE_LIBCRYPTSETUP +typedef struct DecryptedPartition { + struct crypt_device *device; + char *name; + bool relinquished; +} DecryptedPartition; + +struct DecryptedImage { + DecryptedPartition *decrypted; + size_t n_decrypted; + size_t n_allocated; +}; +#endif + +DecryptedImage* decrypted_image_unref(DecryptedImage* d) { +#ifdef HAVE_LIBCRYPTSETUP + size_t i; + int r; + + if (!d) + return NULL; + + for (i = 0; i < d->n_decrypted; i++) { + DecryptedPartition *p = d->decrypted + i; + + if (p->device && p->name && !p->relinquished) { + r = crypt_deactivate(p->device, p->name); + if (r < 0) + log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name); + } + + if (p->device) + crypt_free(p->device); + free(p->name); + } + + free(d); +#endif + return NULL; +} + +#ifdef HAVE_LIBCRYPTSETUP +static int decrypt_partition( + DissectedPartition *m, + const char *passphrase, + DissectImageFlags flags, + DecryptedImage *d) { + + _cleanup_free_ char *node = NULL, *name = NULL; + struct crypt_device *cd; + const char *suffix; + int r; + + assert(m); + assert(d); + + if (!m->found || !m->node || !m->fstype) + return 0; + + if (!streq(m->fstype, "crypto_LUKS")) + return 0; + + suffix = strrchr(m->node, '/'); + if (!suffix) + return -EINVAL; + suffix++; + if (isempty(suffix)) + return -EINVAL; + + name = strjoin(suffix, "-decrypted"); + if (!name) + return -ENOMEM; + if (!filename_is_valid(name)) + return -EINVAL; + + node = strjoin(crypt_get_dir(), "/", name); + if (!node) + return -ENOMEM; + + if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) + return -ENOMEM; + + r = crypt_init(&cd, m->node); + if (r < 0) + return r; + + r = crypt_load(cd, CRYPT_LUKS1, NULL); + if (r < 0) + goto fail; + + r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase), + ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) | + ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0)); + if (r == -EPERM) { + r = -EKEYREJECTED; + goto fail; + } + if (r < 0) + goto fail; + + d->decrypted[d->n_decrypted].name = name; + name = NULL; + + d->decrypted[d->n_decrypted].device = cd; + d->n_decrypted++; + + m->decrypted_node = node; + node = NULL; + + return 0; + +fail: + crypt_free(cd); + return r; +} +#endif + +int dissected_image_decrypt( + DissectedImage *m, + const char *passphrase, + DissectImageFlags flags, + DecryptedImage **ret) { + + _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL; +#ifdef HAVE_LIBCRYPTSETUP + unsigned i; + int r; +#endif + + assert(m); + + /* Returns: + * + * = 0 → There was nothing to decrypt + * > 0 → Decrypted successfully + * -ENOKEY → There's some to decrypt but no key was supplied + * -EKEYREJECTED → Passed key was not correct + */ + + if (!m->encrypted) { + *ret = NULL; + return 0; + } + +#ifdef HAVE_LIBCRYPTSETUP + if (!passphrase) + return -ENOKEY; + + d = new0(DecryptedImage, 1); + if (!d) + return -ENOMEM; + + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + DissectedPartition *p = m->partitions + i; + + if (!p->found) + continue; + + r = decrypt_partition(p, passphrase, flags, d); + if (r < 0) + return r; + + if (!p->decrypted_fstype && p->decrypted_node) { + r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype); + if (r < 0) + return r; + } + } + + *ret = d; + d = NULL; + + return 1; +#else + return -EOPNOTSUPP; +#endif +} + +int dissected_image_decrypt_interactively( + DissectedImage *m, + const char *passphrase, + DissectImageFlags flags, + DecryptedImage **ret) { + + _cleanup_strv_free_erase_ char **z = NULL; + int n = 3, r; + + if (passphrase) + n--; + + for (;;) { + r = dissected_image_decrypt(m, passphrase, flags, ret); + if (r >= 0) + return r; + if (r == -EKEYREJECTED) + log_error_errno(r, "Incorrect passphrase, try again!"); + else if (r != -ENOKEY) { + log_error_errno(r, "Failed to decrypt image: %m"); + return r; + } + + if (--n < 0) { + log_error("Too many retries."); + return -EKEYREJECTED; + } + + z = strv_free(z); + + r = ask_password_auto("Please enter image passphrase!", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z); + if (r < 0) + return log_error_errno(r, "Failed to query for passphrase: %m"); + + passphrase = z[0]; + } +} + +#ifdef HAVE_LIBCRYPTSETUP +static int deferred_remove(DecryptedPartition *p) { + + struct dm_ioctl dm = { + .version = { + DM_VERSION_MAJOR, + DM_VERSION_MINOR, + DM_VERSION_PATCHLEVEL + }, + .data_size = sizeof(dm), + .flags = DM_DEFERRED_REMOVE, + }; + + _cleanup_close_ int fd = -1; + + assert(p); + + /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */ + + fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC); + if (fd < 0) + return -errno; + + strncpy(dm.name, p->name, sizeof(dm.name)); + + if (ioctl(fd, DM_DEV_REMOVE, &dm)) + return -errno; + + return 0; +} +#endif + +int decrypted_image_relinquish(DecryptedImage *d) { + +#ifdef HAVE_LIBCRYPTSETUP + size_t i; + int r; +#endif + + assert(d); + + /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so + * that we don't clean it up ourselves either anymore */ + +#ifdef HAVE_LIBCRYPTSETUP + for (i = 0; i < d->n_decrypted; i++) { + DecryptedPartition *p = d->decrypted + i; + + if (p->relinquished) + continue; + + r = deferred_remove(p); + if (r < 0) + return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name); + + p->relinquished = true; + } +#endif + + return 0; +} + static const char *const partition_designator_table[] = { [PARTITION_ROOT] = "root", [PARTITION_ROOT_SECONDARY] = "root-secondary", diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 04b19e8553..69484eb32c 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -25,6 +25,7 @@ typedef struct DissectedImage DissectedImage; typedef struct DissectedPartition DissectedPartition; +typedef struct DecryptedImage DecryptedImage; struct DissectedPartition { bool found:1; @@ -33,6 +34,8 @@ struct DissectedPartition { int architecture; /* Intended architecture: either native, secondary or unset (-1). */ char *fstype; char *node; + char *decrypted_node; + char *decrypted_fstype; }; enum { @@ -46,12 +49,15 @@ enum { _PARTITION_DESIGNATOR_INVALID = -1 }; -typedef enum DissectedImageMountFlags { - DISSECTED_IMAGE_READ_ONLY = 1, - DISSECTED_IMAGE_DISCARD_ON_LOOP = 2, /* Turn on "discard" if on loop device and file system supports it */ -} DissectedImageMountFlags; +typedef enum DissectImageFlags { + DISSECT_IMAGE_READ_ONLY = 1, + DISSECT_IMAGE_DISCARD_ON_LOOP = 2, /* Turn on "discard" if on loop device and file system supports it */ + DISSECT_IMAGE_DISCARD = 4, /* Turn on "discard" if file system supports it, on all block devices */ + DISSECT_IMAGE_DISCARD_ON_CRYPTO = 8, /* Turn on "discard" also on crypto devices */ +} DissectImageFlags; struct DissectedImage { + bool encrypted; DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX]; }; @@ -60,7 +66,13 @@ int dissect_image(int fd, DissectedImage **ret); DissectedImage* dissected_image_unref(DissectedImage *m); DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref); -int dissected_image_mount(DissectedImage *m, const char *dest, DissectedImageMountFlags flags); +int dissected_image_decrypt(DissectedImage *m, const char *passphrase, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_mount(DissectedImage *m, const char *dest, DissectImageFlags flags); + +DecryptedImage* decrypted_image_unref(DecryptedImage *p); +DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref); +int decrypted_image_relinquish(DecryptedImage *d); const char* partition_designator_to_string(int i) _const_; int partition_designator_from_string(const char *name) _pure_; From 4827ab4854d3107d05b65194ac72729955fb3585 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Dec 2016 17:42:40 +0100 Subject: [PATCH 10/12] nspawn: when generating a machine name from an image name, truncate .raw suffix Let's prettify the machine name we generate for image-based containers: let's chop off the .raw suffix before using it as machine name. --- src/nspawn/nspawn.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 035456f45b..9168228f4a 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1951,10 +1951,22 @@ static int determine_names(void) { } if (!arg_machine) { + if (arg_directory && path_equal(arg_directory, "/")) arg_machine = gethostname_malloc(); - else - arg_machine = strdup(basename(arg_image ?: arg_directory)); + else { + if (arg_image) { + char *e; + + arg_machine = strdup(basename(arg_image)); + + /* Truncate suffix if there is one */ + e = endswith(arg_machine, ".raw"); + if (e) + *e = 0; + } else + arg_machine = strdup(basename(arg_directory)); + } if (!arg_machine) return log_oom(); From 4623e8e6ac7c7a36b16ec2dc9ad8507fd820c9fa Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Dec 2016 18:28:13 +0100 Subject: [PATCH 11/12] nspawn/dissect: automatically discover dm-verity verity partitions This adds support for discovering and making use of properly tagged dm-verity data integrity partitions. This extends both systemd-nspawn and systemd-dissect with a new --root-hash= switch that takes the root hash to use for the root partition, and is otherwise fully automatic. Verity partitions are discovered automatically by GPT table type UUIDs, as listed in https://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ (which I updated prior to this change, to include new UUIDs for this purpose. mkosi with https://github.com/systemd/mkosi/pull/39 applied may generate images that carry the necessary integrity data. With that PR and this commit, the following simply lines suffice to boot up an integrity-protected container image: ``` # mkdir test # cd test # mkosi --verity # systemd-nspawn -i ./image.raw -bn ``` Note that mkosi writes the image file to "image.raw" next to a a file "image.roothash" that contains the root hash. systemd-nspawn will look for that file and use it if it exists, in case --root-hash= is not specified explicitly. --- src/dissect/dissect.c | 43 +++++- src/machine/image-dbus.c | 2 +- src/nspawn/nspawn.c | 88 +++++++++++- src/shared/dissect-image.c | 245 ++++++++++++++++++++++++++++++---- src/shared/dissect-image.h | 20 ++- src/shared/gpt.h | 17 ++- src/test/test-dissect-image.c | 2 +- 7 files changed, 378 insertions(+), 39 deletions(-) diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 5e6848acb4..e3c96b7407 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -23,6 +23,7 @@ #include "architecture.h" #include "dissect-image.h" +#include "hexdecoct.h" #include "log.h" #include "loop-util.h" #include "string-util.h" @@ -35,6 +36,8 @@ static enum { static const char *arg_image = NULL; static const char *arg_path = NULL; static DissectImageFlags arg_flags = DISSECT_IMAGE_DISCARD_ON_LOOP; +static void *arg_root_hash = NULL; +static size_t arg_root_hash_size = 0; static void help(void) { printf("%s [OPTIONS...] IMAGE\n" @@ -44,7 +47,8 @@ static void help(void) { " --version Show package version\n" " -m --mount Mount the image to the specified directory\n" " -r --read-only Mount read-only\n" - " --discard=MODE Choose 'discard' mode (disabled, loop, all, crypto)\n", + " --discard=MODE Choose 'discard' mode (disabled, loop, all, crypto)\n" + " --root-hash=HASH Specify root hash for verity\n", program_invocation_short_name, program_invocation_short_name); } @@ -54,6 +58,7 @@ static int parse_argv(int argc, char *argv[]) { enum { ARG_VERSION = 0x100, ARG_DISCARD, + ARG_ROOT_HASH, }; static const struct option options[] = { @@ -62,10 +67,11 @@ static int parse_argv(int argc, char *argv[]) { { "mount", no_argument, NULL, 'm' }, { "read-only", no_argument, NULL, 'r' }, { "discard", required_argument, NULL, ARG_DISCARD }, + { "root-hash", required_argument, NULL, ARG_ROOT_HASH }, {} }; - int c; + int c, r; assert(argc >= 0); assert(argv); @@ -105,6 +111,25 @@ static int parse_argv(int argc, char *argv[]) { break; + case ARG_ROOT_HASH: { + void *p; + size_t l; + + r = unhexmem(optarg, strlen(optarg), &p, &l); + if (r < 0) + return log_error_errno(r, "Failed to parse root hash: %s", optarg); + if (l < sizeof(sd_id128_t)) { + log_error("Root hash must be at least 128bit long: %s", optarg); + free(p); + return -EINVAL; + } + + free(arg_root_hash); + arg_root_hash = p; + arg_root_hash_size = l; + break; + } + case '?': return -EINVAL; @@ -162,11 +187,15 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissect_image(d->fd, &m); + r = dissect_image(d->fd, arg_root_hash, arg_root_hash_size, &m); if (r == -ENOPKG) { log_error_errno(r, "Couldn't identify a suitable partition table or file system in %s.", arg_image); goto finish; } + if (r == -EADDRNOTAVAIL) { + log_error_errno(r, "No root partition for specified root hash found in %s.", arg_image); + goto finish; + } if (r < 0) { log_error_errno(r, "Failed to dissect image: %m"); goto finish; @@ -179,6 +208,7 @@ int main(int argc, char *argv[]) { for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { DissectedPartition *p = m->partitions + i; + int k; if (!p->found) continue; @@ -193,6 +223,10 @@ int main(int argc, char *argv[]) { if (p->architecture != _ARCHITECTURE_INVALID) printf(" for %s", architecture_to_string(p->architecture)); + k = PARTITION_VERITY_OF(i); + if (k >= 0) + printf(" %s verity", m->partitions[k].found ? "with" : "without"); + if (p->partno >= 0) printf(" on partition #%i", p->partno); @@ -206,7 +240,7 @@ int main(int argc, char *argv[]) { } case ACTION_MOUNT: - r = dissected_image_decrypt_interactively(m, NULL, arg_flags, &di); + r = dissected_image_decrypt_interactively(m, NULL, arg_root_hash, arg_root_hash_size, arg_flags, &di); if (r < 0) goto finish; @@ -232,5 +266,6 @@ int main(int argc, char *argv[]) { } finish: + free(arg_root_hash); return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index 65953b368f..e2fb882393 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -336,7 +336,7 @@ static int raw_image_get_os_release(Image *image, char ***ret, sd_bus_error *err if (r < 0) return sd_bus_error_set_errnof(error, r, "Failed to set up loop block device for %s: %m", image->path); - r = dissect_image(d->fd, &m); + r = dissect_image(d->fd, NULL, 0, &m); if (r == -ENOPKG) return sd_bus_error_set_errnof(error, r, "Disk image %s not understood: %m", image->path); if (r < 0) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 9168228f4a..de05b6c5ef 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -61,6 +61,7 @@ #include "format-util.h" #include "fs-util.h" #include "gpt.h" +#include "hexdecoct.h" #include "hostname-util.h" #include "id128-util.h" #include "log.h" @@ -200,6 +201,8 @@ static bool arg_notify_ready = false; static bool arg_use_cgns = true; static unsigned long arg_clone_ns_flags = CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS; static MountSettingsMask arg_mount_settings = MOUNT_APPLY_APIVFS_RO; +static void *arg_root_hash = NULL; +static size_t arg_root_hash_size = 0; static void help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -213,6 +216,7 @@ static void help(void) { " -x --ephemeral Run container with snapshot of root directory, and\n" " remove it after exit\n" " -i --image=PATH File system device or disk image for the container\n" + " --root-hash=HASH Specify verity root hash\n" " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n" " -b --boot Boot up full system (i.e. invoke init)\n" " --chdir=PATH Set working directory in the container\n" @@ -424,6 +428,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_CHDIR, ARG_PRIVATE_USERS_CHOWN, ARG_NOTIFY_READY, + ARG_ROOT_HASH, }; static const struct option options[] = { @@ -473,6 +478,7 @@ static int parse_argv(int argc, char *argv[]) { { "settings", required_argument, NULL, ARG_SETTINGS }, { "chdir", required_argument, NULL, ARG_CHDIR }, { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY }, + { "root-hash", required_argument, NULL, ARG_ROOT_HASH }, {} }; @@ -1016,6 +1022,25 @@ static int parse_argv(int argc, char *argv[]) { arg_settings_mask |= SETTING_NOTIFY_READY; break; + case ARG_ROOT_HASH: { + void *k; + size_t l; + + r = unhexmem(optarg, strlen(optarg), &k, &l); + if (r < 0) + return log_error_errno(r, "Failed to parse root hash: %s", optarg); + if (l < sizeof(sd_id128_t)) { + log_error("Root hash must be at least 128bit long: %s", optarg); + free(k); + return -EINVAL; + } + + free(arg_root_hash); + arg_root_hash = k; + arg_root_hash_size = l; + break; + } + case '?': return -EINVAL; @@ -3409,6 +3434,53 @@ static int run(int master, return 1; /* loop again */ } +static int load_root_hash(const char *image) { + _cleanup_free_ char *text = NULL; + char *fn, *n, *e; + void *k; + size_t l; + int r; + + assert_se(image); + + /* Try to load the root hash from a file next to the image file if it exists. */ + + if (arg_root_hash) + return 0; + + fn = new(char, strlen(image) + strlen(".roothash") + 1); + if (!fn) + return log_oom(); + + n = stpcpy(fn, image); + e = endswith(fn, ".raw"); + if (e) + n = e; + + strcpy(n, ".roothash"); + + r = read_one_line_file(fn, &text); + if (r == -ENOENT) + return 0; + if (r < 0) { + log_warning_errno(r, "Failed to read %s, ignoring: %m", fn); + return 0; + } + + r = unhexmem(text, strlen(text), &k, &l); + if (r < 0) + return log_error_errno(r, "Invalid root hash: %s", text); + if (l < sizeof(sd_id128_t)) { + free(k); + return log_error_errno(r, "Root hash too short: %s", text); + } + + arg_root_hash = k; + arg_root_hash_size = l; + + return 0; +} + int main(int argc, char *argv[]) { _cleanup_free_ char *console = NULL; @@ -3623,6 +3695,10 @@ int main(int argc, char *argv[]) { r = log_error_errno(r, "Failed to create image lock: %m"); goto finish; } + + r = load_root_hash(arg_image); + if (r < 0) + goto finish; } if (!mkdtemp(tmprootdir)) { @@ -3644,7 +3720,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissect_image(loop->fd, &dissected_image); + r = dissect_image(loop->fd, arg_root_hash, arg_root_hash_size, &dissected_image); if (r == -ENOPKG) { log_error_errno(r, "Could not find a suitable file system or partition table in image: %s", arg_image); @@ -3656,6 +3732,10 @@ int main(int argc, char *argv[]) { "in order to be bootable with systemd-nspawn."); goto finish; } + if (r == -EADDRNOTAVAIL) { + log_error_errno(r, "No root partition for specified root hash found."); + goto finish; + } if (r == -EOPNOTSUPP) { log_error_errno(r, "--image= is not supported, compiled without blkid support."); goto finish; @@ -3665,7 +3745,10 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissected_image_decrypt_interactively(dissected_image, NULL, 0, &decrypted_image); + if (!arg_root_hash && dissected_image->can_verity) + log_notice("Note: image %s contains verity information, but no root hash specified! Proceeding without integrity checking.", arg_image); + + r = dissected_image_decrypt_interactively(dissected_image, NULL, arg_root_hash, arg_root_hash_size, 0, &decrypted_image); if (r < 0) goto finish; @@ -3792,6 +3875,7 @@ finish: strv_free(arg_parameters); custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts); expose_port_free_all(arg_expose_ports); + free(arg_root_hash); return r < 0 ? EXIT_FAILURE : ret; } diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index bc4e45be6e..257af78781 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -84,9 +84,10 @@ not_found: #endif } -int dissect_image(int fd, DissectedImage **ret) { +int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectedImage **ret) { #ifdef HAVE_BLKID + sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL; _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL; bool is_gpt, is_mbr, generic_rw, multiple_generic = false; _cleanup_udev_device_unref_ struct udev_device *d = NULL; @@ -103,10 +104,29 @@ int dissect_image(int fd, DissectedImage **ret) { assert(fd >= 0); assert(ret); + assert(root_hash || root_hash_size == 0); /* Probes a disk image, and returns information about what it found in *ret. * - * Returns -ENOPKG if no suitable partition table or file system could be found. */ + * Returns -ENOPKG if no suitable partition table or file system could be found. + * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */ + + if (root_hash) { + /* If a root hash is supplied, then we use the root partition that has a UUID that match the first + * 128bit of the root hash. And we use the verity partition that has a UUID that match the final + * 128bit. */ + + if (root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + + memcpy(&root_uuid, root_hash, sizeof(sd_id128_t)); + memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t)); + + if (sd_id128_is_null(root_uuid)) + return -EINVAL; + if (sd_id128_is_null(verity_uuid)) + return -EINVAL; + } if (fstat(fd, &st) < 0) return -errno; @@ -313,17 +333,22 @@ int dissect_image(int fd, DissectedImage **ret) { if (is_gpt) { int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID; - const char *stype, *fstype = NULL; - sd_id128_t type_id; + const char *stype, *sid, *fstype = NULL; + sd_id128_t type_id, id; bool rw = true; if (flags & GPT_FLAG_NO_AUTO) continue; + sid = blkid_partition_get_uuid(pp); + if (!sid) + continue; + if (sd_id128_from_string(sid, &id) < 0) + continue; + stype = blkid_partition_get_type_string(pp); if (!stype) continue; - if (sd_id128_from_string(stype, &type_id) < 0) continue; @@ -339,17 +364,57 @@ int dissect_image(int fd, DissectedImage **ret) { } #ifdef GPT_ROOT_NATIVE else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { + + /* If a root ID is specified, ignore everything but the root id */ + if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) + continue; + designator = PARTITION_ROOT; architecture = native_architecture(); rw = !(flags & GPT_FLAG_READ_ONLY); } +#ifdef GPT_ROOT_NATIVE_VERITY + else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) { + + m->can_verity = true; + + /* Ignore verity unless a root hash is specified */ + if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id)) + continue; + + designator = PARTITION_ROOT_VERITY; + fstype = "DM_verity_hash"; + architecture = native_architecture(); + rw = false; + } +#endif #endif #ifdef GPT_ROOT_SECONDARY else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { + + /* If a root ID is specified, ignore everything but the root id */ + if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) + continue; + designator = PARTITION_ROOT_SECONDARY; architecture = SECONDARY_ARCHITECTURE; rw = !(flags & GPT_FLAG_READ_ONLY); } +#ifdef GPT_ROOT_SECONDARY_VERITY + else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) { + + m->can_verity = true; + + /* Ignore verity unless root has is specified */ + if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id)) + continue; + + designator = PARTITION_ROOT_SECONDARY_VERITY; + fstype = "DM_verity_hash"; + architecture = SECONDARY_ARCHITECTURE; + rw = false; + } +#endif #endif else if (sd_id128_equal(type_id, GPT_SWAP)) { designator = PARTITION_SWAP; @@ -420,10 +485,17 @@ int dissect_image(int fd, DissectedImage **ret) { /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not * either, then check if there's a single generic one, and use that. */ + if (m->partitions[PARTITION_ROOT_VERITY].found) + return -ENXIO; + if (m->partitions[PARTITION_ROOT_SECONDARY].found) { m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY]; zero(m->partitions[PARTITION_ROOT_SECONDARY]); - } else if (generic_node) { + + m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY]; + zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]); + + } else if (generic_node && !root_hash) { if (multiple_generic) return -ENOTUNIQ; @@ -441,6 +513,24 @@ int dissect_image(int fd, DissectedImage **ret) { return -ENXIO; } + assert(m->partitions[PARTITION_ROOT].found); + + if (root_hash) { + if (!m->partitions[PARTITION_ROOT_VERITY].found) + return -EADDRNOTAVAIL; + + /* If we found the primary root with the hash, then we definitely want to suppress any secondary root + * (which would be weird, after all the root hash should only be assigned to one pair of + * partitions... */ + m->partitions[PARTITION_ROOT_SECONDARY].found = false; + m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false; + + /* If we found a verity setup, then the root partition is necessarily read-only. */ + m->partitions[PARTITION_ROOT].rw = false; + + m->verity = true; + } + blkid_free_probe(b); b = NULL; @@ -637,6 +727,40 @@ DecryptedImage* decrypted_image_unref(DecryptedImage* d) { } #ifdef HAVE_LIBCRYPTSETUP + +static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) { + _cleanup_free_ char *name = NULL, *node = NULL; + const char *base; + + assert(original_node); + assert(suffix); + assert(ret_name); + assert(ret_node); + + base = strrchr(original_node, '/'); + if (!base) + return -EINVAL; + base++; + if (isempty(base)) + return -EINVAL; + + name = strjoin(base, suffix); + if (!name) + return -ENOMEM; + if (!filename_is_valid(name)) + return -EINVAL; + + node = strjoin(crypt_get_dir(), "/", name); + if (!node) + return -ENOMEM; + + *ret_name = name; + *ret_node = node; + + name = node = NULL; + return 0; +} + static int decrypt_partition( DissectedPartition *m, const char *passphrase, @@ -645,7 +769,6 @@ static int decrypt_partition( _cleanup_free_ char *node = NULL, *name = NULL; struct crypt_device *cd; - const char *suffix; int r; assert(m); @@ -657,22 +780,9 @@ static int decrypt_partition( if (!streq(m->fstype, "crypto_LUKS")) return 0; - suffix = strrchr(m->node, '/'); - if (!suffix) - return -EINVAL; - suffix++; - if (isempty(suffix)) - return -EINVAL; - - name = strjoin(suffix, "-decrypted"); - if (!name) - return -ENOMEM; - if (!filename_is_valid(name)) - return -EINVAL; - - node = strjoin(crypt_get_dir(), "/", name); - if (!node) - return -ENOMEM; + r = make_dm_name_and_node(m->node, "-decrypted", &name, &node); + if (r < 0) + return r; if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) return -ENOMEM; @@ -706,6 +816,71 @@ static int decrypt_partition( return 0; +fail: + crypt_free(cd); + return r; +} + +static int verity_partition( + DissectedPartition *m, + DissectedPartition *v, + const void *root_hash, + size_t root_hash_size, + DissectImageFlags flags, + DecryptedImage *d) { + + _cleanup_free_ char *node = NULL, *name = NULL; + struct crypt_device *cd; + int r; + + assert(m); + assert(v); + + if (!root_hash) + return 0; + + if (!m->found || !m->node || !m->fstype) + return 0; + if (!v->found || !v->node || !v->fstype) + return 0; + + if (!streq(v->fstype, "DM_verity_hash")) + return 0; + + r = make_dm_name_and_node(m->node, "-verity", &name, &node); + if (r < 0) + return r; + + if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) + return -ENOMEM; + + r = crypt_init(&cd, v->node); + if (r < 0) + return r; + + r = crypt_load(cd, CRYPT_VERITY, NULL); + if (r < 0) + goto fail; + + r = crypt_set_data_device(cd, m->node); + if (r < 0) + goto fail; + + r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY); + if (r < 0) + goto fail; + + d->decrypted[d->n_decrypted].name = name; + name = NULL; + + d->decrypted[d->n_decrypted].device = cd; + d->n_decrypted++; + + m->decrypted_node = node; + node = NULL; + + return 0; + fail: crypt_free(cd); return r; @@ -715,6 +890,8 @@ fail: int dissected_image_decrypt( DissectedImage *m, const char *passphrase, + const void *root_hash, + size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret) { @@ -725,6 +902,7 @@ int dissected_image_decrypt( #endif assert(m); + assert(root_hash || root_hash_size == 0); /* Returns: * @@ -734,13 +912,16 @@ int dissected_image_decrypt( * -EKEYREJECTED → Passed key was not correct */ - if (!m->encrypted) { + if (root_hash && root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + + if (!m->encrypted && !m->verity) { *ret = NULL; return 0; } #ifdef HAVE_LIBCRYPTSETUP - if (!passphrase) + if (m->encrypted && !passphrase) return -ENOKEY; d = new0(DecryptedImage, 1); @@ -749,6 +930,7 @@ int dissected_image_decrypt( for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { DissectedPartition *p = m->partitions + i; + int k; if (!p->found) continue; @@ -757,6 +939,13 @@ int dissected_image_decrypt( if (r < 0) return r; + k = PARTITION_VERITY_OF(i); + if (k >= 0) { + r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d); + if (r < 0) + return r; + } + if (!p->decrypted_fstype && p->decrypted_node) { r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype); if (r < 0) @@ -776,6 +965,8 @@ int dissected_image_decrypt( int dissected_image_decrypt_interactively( DissectedImage *m, const char *passphrase, + const void *root_hash, + size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret) { @@ -786,7 +977,7 @@ int dissected_image_decrypt_interactively( n--; for (;;) { - r = dissected_image_decrypt(m, passphrase, flags, ret); + r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret); if (r >= 0) return r; if (r == -EKEYREJECTED) @@ -880,6 +1071,8 @@ static const char *const partition_designator_table[] = { [PARTITION_SRV] = "srv", [PARTITION_ESP] = "esp", [PARTITION_SWAP] = "swap", + [PARTITION_ROOT_VERITY] = "root-verity", + [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity", }; DEFINE_STRING_TABLE_LOOKUP(partition_designator, int); diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 69484eb32c..902c8d4a37 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -45,10 +45,20 @@ enum { PARTITION_SRV, PARTITION_ESP, PARTITION_SWAP, + PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */ + PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */ _PARTITION_DESIGNATOR_MAX, _PARTITION_DESIGNATOR_INVALID = -1 }; +static inline int PARTITION_VERITY_OF(int p) { + if (p == PARTITION_ROOT) + return PARTITION_ROOT_VERITY; + if (p == PARTITION_ROOT_SECONDARY) + return PARTITION_ROOT_SECONDARY_VERITY; + return _PARTITION_DESIGNATOR_INVALID; +} + typedef enum DissectImageFlags { DISSECT_IMAGE_READ_ONLY = 1, DISSECT_IMAGE_DISCARD_ON_LOOP = 2, /* Turn on "discard" if on loop device and file system supports it */ @@ -57,17 +67,19 @@ typedef enum DissectImageFlags { } DissectImageFlags; struct DissectedImage { - bool encrypted; + bool encrypted:1; + bool verity:1; /* verity available and usable */ + bool can_verity:1; /* verity available, but not necessarily used */ DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX]; }; -int dissect_image(int fd, DissectedImage **ret); +int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectedImage **ret); DissectedImage* dissected_image_unref(DissectedImage *m); DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref); -int dissected_image_decrypt(DissectedImage *m, const char *passphrase, DissectImageFlags flags, DecryptedImage **ret); -int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret); int dissected_image_mount(DissectedImage *m, const char *dest, DissectImageFlags flags); DecryptedImage* decrypted_image_unref(DecryptedImage *p); diff --git a/src/shared/gpt.h b/src/shared/gpt.h index 55b41bbcd8..13d80d611c 100644 --- a/src/shared/gpt.h +++ b/src/shared/gpt.h @@ -32,28 +32,43 @@ #define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3) #define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae) #define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97) - #define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b) #define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f) #define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15) #define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8) +/* Verity partitions for the root partitions above (we only define them for the root partitions, because only they are + * are commonly read-only and hence suitable for verity). */ +#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76) +#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5) +#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6) +#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20) +#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71) + + #if defined(__x86_64__) # define GPT_ROOT_NATIVE GPT_ROOT_X86_64 # define GPT_ROOT_SECONDARY GPT_ROOT_X86 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY +# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY #elif defined(__i386__) # define GPT_ROOT_NATIVE GPT_ROOT_X86 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY #endif #if defined(__ia64__) # define GPT_ROOT_NATIVE GPT_ROOT_IA64 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY #endif #if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN) # define GPT_ROOT_NATIVE GPT_ROOT_ARM_64 # define GPT_ROOT_SECONDARY GPT_ROOT_ARM +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY +# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY #elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN) # define GPT_ROOT_NATIVE GPT_ROOT_ARM +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY #endif /* Flags we recognize on the root, swap, home and srv partitions when diff --git a/src/test/test-dissect-image.c b/src/test/test-dissect-image.c index 0363ef8eb6..0512a15e88 100644 --- a/src/test/test-dissect-image.c +++ b/src/test/test-dissect-image.c @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - r = dissect_image(d->fd, &m); + r = dissect_image(d->fd, NULL, 0, &m); if (r < 0) { log_error_errno(r, "Failed to dissect image: %m"); return EXIT_FAILURE; From 58abb66f4b9b0b3a16fe29211454d9936d35c35d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Dec 2016 18:36:08 +0100 Subject: [PATCH 12/12] man: update the nspawn man page, and document what kind of dissection features we now support --- man/systemd-nspawn.xml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index cd0a90d82f..2bc81ea1aa 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -235,16 +235,33 @@ identified by the partition types defined by the Discoverable Partitions Specification. + + No partition table, and a single file system spanning the whole image. On GPT images, if an EFI System Partition (ESP) is discovered, it is automatically mounted to /efi (or /boot as fallback) in case a directory by this name exists and is empty. + Partitions encrypted with LUKS are automatically decrypted. Also, on GPT images dm-verity data integrity + hash partitions are set up if the root hash for them is specified using the + option. + Any other partitions, such as foreign partitions or swap partitions are not mounted. May not be specified together with , . + + + + Takes a data integrity (dm-verity) root hash specified in hexadecimal. This option enables data + integrity checks using dm-verity, if the used image contains the appropriate integrity data (see above). The + specified hash must match the root hash of integrity data, and is usually at least 256bits (and hence 64 + hexadecimal characters) long (in case of SHA256 for example). If this option is not specified, but a file with + the .roothash suffix is found next to the image file, bearing otherwise the same name the + root hash is read from it and automatically used. + +