Merge pull request #6996 from poettering/discover-block-device

add nspawn image discovery on block devices
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2017-10-30 16:05:20 +01:00 committed by GitHub
commit ecd9ded89b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 167 additions and 31 deletions

4
TODO
View File

@ -35,6 +35,10 @@ Features:
the quota of a the user indicated in User= via unit file settings, like the
other resource management concepts. Would mix nicely with DynamicUser=1
* add dissect_image_warn() as a wrapper around dissect_image() that prints
friendly log messages for the returned errors, so that we don't have to
duplicate that in nspawn, systemd-dissect and PID 1.
* maybe set a new set of env vars for services, based on RuntimeDirectory=,
StateDirectory=, LogsDirectory=, CacheDirectory= and ConfigurationDirectory=
automatically. For example, there could be $RUNTIME_DIRECTORY,

View File

@ -1212,7 +1212,7 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol
if (!S_ISDIR(st.st_mode))
return -EINVAL;
subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
if (subvol_fd < 0)
return -errno;
@ -1292,7 +1292,7 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol
* hence we need to open the
* containing directory first */
child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
if (child_fd < 0)
return -errno;
@ -1641,7 +1641,7 @@ static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolum
if (!c)
return -ENOMEM;
old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
if (old_child_fd < 0)
return -errno;
@ -1649,7 +1649,7 @@ static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolum
if (!np)
return -ENOMEM;
new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
if (new_child_fd < 0)
return -errno;
@ -1660,7 +1660,7 @@ static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolum
* into place. */
if (subvolume_fd < 0) {
subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
if (subvolume_fd < 0)
return -errno;
}

View File

@ -766,12 +766,11 @@ int chase_symlinks(const char *path, const char *original_root, unsigned flags,
return -ENOMEM;
}
}
/* Prefix what's left to do with what we just read, and start the loop again,
* but remain in the current directory. */
joined = strjoin("/", destination, todo);
/* Prefix what's left to do with what we just read, and start the loop again, but
* remain in the current directory. */
joined = strjoin(destination, todo);
} else
joined = strjoin("/", destination, todo);
if (!joined)
return -ENOMEM;

View File

@ -216,6 +216,10 @@ int main(int argc, char *argv[]) {
log_error_errno(r, "No suitable root partition found in image %s.", arg_image);
goto finish;
}
if (r == -EPROTONOSUPPORT) {
log_error_errno(r, "Device %s is loopback block device with partition scanning turned off, please turn it on.", arg_image);
goto finish;
}
if (r < 0) {
log_error_errno(r, "Failed to dissect image: %m");
goto finish;

View File

@ -431,6 +431,7 @@ int bus_image_method_get_os_release(
break;
case IMAGE_RAW:
case IMAGE_BLOCK:
r = raw_image_get_os_release(image, &v, error);
break;

View File

@ -2089,7 +2089,7 @@ static int determine_names(void) {
return -ENOENT;
}
if (i->type == IMAGE_RAW)
if (IN_SET(i->type, IMAGE_RAW, IMAGE_BLOCK))
r = free_and_strdup(&arg_image, i->path);
else
r = free_and_strdup(&arg_directory, i->path);
@ -3883,6 +3883,10 @@ int main(int argc, char *argv[]) {
log_error_errno(r, "--image= is not supported, compiled without blkid support.");
goto finish;
}
if (r == -EPROTONOSUPPORT) {
log_error_errno(r, "Device is loopback block device with partition scanning turned off, please turn it on.");
goto finish;
}
if (r < 0) {
log_error_errno(r, "Failed to dissect image: %m");
goto finish;

View File

@ -266,18 +266,34 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI
return -EIO;
}
if (n < z + 1) {
unsigned j;
unsigned j = 0;
/* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
* or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
* synchronous call that waits until probing is complete. */
for (j = 0; j < 20; j++) {
for (;;) {
if (j++ > 20)
return -EBUSY;
r = ioctl(fd, BLKRRPART, 0);
if (r < 0)
if (ioctl(fd, BLKRRPART, 0) < 0) {
r = -errno;
if (r >= 0 || r != -EBUSY)
if (r == -EINVAL) {
struct loop_info64 info;
/* If we are running on a loop device that has partition scanning off,
* return an explicit recognizable error about this, so that callers
* can generate a proper message explaining the situation. */
if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
log_debug("Device is loop device and partition scanning is off!");
return -EPROTONOSUPPORT;
}
}
if (r != -EBUSY)
return r;
} else
break;
/* If something else has the device open, such as an udev rule, the ioctl will return
@ -286,11 +302,8 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI
*
* This is really something they should fix in the kernel! */
usleep(50 * USEC_PER_MSEC);
(void) usleep(50 * USEC_PER_MSEC);
}
if (r < 0)
return r;
}
e = udev_enumerate_unref(e);

View File

@ -171,9 +171,8 @@ static int image_make(
assert(filename);
/* We explicitly *do* follow symlinks here, since we want to
* allow symlinking trees into /var/lib/machines/, and treat
* them normally. */
/* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
* devices into /var/lib/machines/, and treat them normally. */
if (fstatat(dfd, filename, &st, 0) < 0)
return -errno;
@ -286,6 +285,58 @@ static int image_make(
(*ret)->limit = (*ret)->limit_exclusive = st.st_size;
return 1;
} else if (S_ISBLK(st.st_mode)) {
_cleanup_close_ int block_fd = -1;
uint64_t size = UINT64_MAX;
/* A block device */
if (!ret)
return 1;
if (!pretty)
pretty = filename;
block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
if (block_fd < 0)
log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
else {
if (fstat(block_fd, &st) < 0)
return -errno;
if (!S_ISBLK(st.st_mode)) /* Verify that what we opened is actually what we think it is */
return -ENOTTY;
if (!read_only) {
int state = 0;
if (ioctl(block_fd, BLKROGET, &state) < 0)
log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
else if (state)
read_only = true;
}
if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
log_debug_errno(errno, "Failed to issue BLKFLSBUF on device %s/%s, ignoring: %m", path, filename);
block_fd = safe_close(block_fd);
}
r = image_new(IMAGE_BLOCK,
pretty,
path,
filename,
!(st.st_mode & 0222) || read_only,
0,
0,
ret);
if (r < 0)
return r;
if (size != 0 && size != UINT64_MAX)
(*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
return 1;
}
return 0;
@ -432,9 +483,15 @@ int image_remove(Image *i) {
switch (i->type) {
case IMAGE_SUBVOLUME:
r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
if (r < 0)
return r;
/* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
* big guns */
if (unlink(i->path) < 0) {
r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
if (r < 0)
return r;
}
break;
case IMAGE_DIRECTORY:
@ -446,6 +503,17 @@ int image_remove(Image *i) {
break;
case IMAGE_BLOCK:
/* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
* itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
* the thing (it's most likely a symlink after all). */
if (path_startswith(i->path, "/dev"))
break;
/* fallthrough */
case IMAGE_RAW:
if (unlink(i->path) < 0)
return -errno;
@ -536,6 +604,15 @@ int image_rename(Image *i, const char *new_name) {
new_path = file_in_same_dir(i->path, new_name);
break;
case IMAGE_BLOCK:
/* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
if (path_startswith(i->path, "/dev"))
return -EROFS;
new_path = file_in_same_dir(i->path, new_name);
break;
case IMAGE_RAW: {
const char *fn;
@ -659,6 +736,7 @@ int image_clone(Image *i, const char *new_name, bool read_only) {
r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
break;
case IMAGE_BLOCK:
default:
return -EOPNOTSUPP;
}
@ -737,6 +815,26 @@ int image_read_only(Image *i, bool b) {
break;
}
case IMAGE_BLOCK: {
_cleanup_close_ int fd = -1;
struct stat st;
int state = b;
fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
if (fd < 0)
return -errno;
if (fstat(fd, &st) < 0)
return -errno;
if (!S_ISBLK(st.st_mode))
return -ENOTTY;
if (ioctl(fd, BLKROSET, &state) < 0)
return -errno;
break;
}
default:
return -EOPNOTSUPP;
}
@ -772,13 +870,24 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile
return -EBUSY;
if (stat(path, &st) >= 0) {
if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
if (S_ISBLK(st.st_mode))
r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
else
return -ENOTTY;
if (r < 0)
return -ENOMEM;
}
r = make_lock_file_for(path, operation, &t);
if (r < 0)
return r;
/* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
* block devices are device local anyway. */
if (!path_startswith(path, "/dev")) {
r = make_lock_file_for(path, operation, &t);
if (r < 0)
return r;
}
if (p) {
mkdir_p("/run/systemd/nspawn/locks", 0700);
@ -860,6 +969,7 @@ static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
[IMAGE_DIRECTORY] = "directory",
[IMAGE_SUBVOLUME] = "subvolume",
[IMAGE_RAW] = "raw",
[IMAGE_BLOCK] = "block",
};
DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);

View File

@ -33,6 +33,7 @@ typedef enum ImageType {
IMAGE_DIRECTORY,
IMAGE_SUBVOLUME,
IMAGE_RAW,
IMAGE_BLOCK,
_IMAGE_TYPE_MAX,
_IMAGE_TYPE_INVALID = -1
} ImageType;