Systemd/src/shared/machine-image.c

1275 lines
41 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <stdio.h>
#include <stdlib.h>
2016-02-19 00:34:30 +01:00
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "alloc-util.h"
#include "btrfs-util.h"
#include "chattr-util.h"
#include "copy.h"
#include "dirent-util.h"
#include "dissect-image.h"
#include "env-file.h"
#include "env-util.h"
#include "fd-util.h"
#include "fs-util.h"
#include "hashmap.h"
#include "hostname-util.h"
#include "id128-util.h"
#include "lockfile-util.h"
#include "log.h"
#include "loop-util.h"
#include "machine-image.h"
#include "macro.h"
#include "mkdir.h"
#include "nulstr-util.h"
#include "os-util.h"
#include "path-util.h"
#include "rm-rf.h"
#include "string-table.h"
#include "string-util.h"
#include "strv.h"
#include "time-util.h"
#include "utf8.h"
#include "xattr-util.h"
static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
[IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
"/run/machines\0" /* and here too */
"/var/lib/machines\0" /* the main place for images */
"/var/lib/container\0" /* legacy */
"/usr/local/lib/machines\0"
"/usr/lib/machines\0",
[IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
"/run/portables\0" /* and here too */
"/var/lib/portables\0" /* the main place for images */
"/usr/local/lib/portables\0"
"/usr/lib/portables\0",
};
static Image *image_free(Image *i) {
assert(i);
2018-04-16 21:38:24 +02:00
free(i->name);
free(i->path);
free(i->hostname);
strv_free(i->machine_info);
strv_free(i->os_release);
2016-10-17 00:28:30 +02:00
return mfree(i);
}
DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
Image, image_unref);
2018-04-16 21:38:24 +02:00
static char **image_settings_path(Image *image) {
_cleanup_strv_free_ char **l = NULL;
const char *fn, *s;
unsigned i = 0;
assert(image);
l = new0(char*, 4);
if (!l)
return NULL;
fn = strjoina(image->name, ".nspawn");
FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
l[i] = path_join(s, fn);
if (!l[i])
return NULL;
i++;
}
l[i] = file_in_same_dir(image->path, fn);
if (!l[i])
return NULL;
return TAKE_PTR(l);
}
static char *image_roothash_path(Image *image) {
const char *fn;
assert(image);
fn = strjoina(image->name, ".roothash");
return file_in_same_dir(image->path, fn);
}
static int image_new(
ImageType t,
const char *pretty,
const char *path,
const char *filename,
bool read_only,
usec_t crtime,
usec_t mtime,
Image **ret) {
_cleanup_(image_unrefp) Image *i = NULL;
assert(t >= 0);
assert(t < _IMAGE_TYPE_MAX);
assert(pretty);
assert(filename);
assert(ret);
i = new0(Image, 1);
if (!i)
return -ENOMEM;
2018-04-16 21:38:24 +02:00
i->n_ref = 1;
i->type = t;
i->read_only = read_only;
i->crtime = crtime;
i->mtime = mtime;
i->usage = i->usage_exclusive = (uint64_t) -1;
i->limit = i->limit_exclusive = (uint64_t) -1;
i->name = strdup(pretty);
if (!i->name)
return -ENOMEM;
i->path = path_join(path, filename);
if (!i->path)
return -ENOMEM;
path_simplify(i->path, false);
*ret = TAKE_PTR(i);
return 0;
}
static int extract_pretty(const char *path, const char *suffix, char **ret) {
_cleanup_free_ char *name = NULL;
const char *p;
size_t n;
assert(path);
assert(ret);
p = last_path_component(path);
n = strcspn(p, "/");
name = strndup(p, n);
if (!name)
return -ENOMEM;
if (suffix) {
char *e;
e = endswith(name, suffix);
if (!e)
return -EINVAL;
*e = 0;
}
if (!image_name_is_valid(name))
return -EINVAL;
*ret = TAKE_PTR(name);
return 0;
}
static int image_make(
const char *pretty,
int dfd,
const char *path,
const char *filename,
const struct stat *st,
Image **ret) {
_cleanup_free_ char *pretty_buffer = NULL, *parent = NULL;
struct stat stbuf;
bool read_only;
int r;
assert(dfd >= 0 || dfd == AT_FDCWD);
assert(path || dfd == AT_FDCWD);
assert(filename);
/* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
* devices into /var/lib/machines/, and treat them normally.
*
* This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
* recognize. */
if (!st) {
if (fstatat(dfd, filename, &stbuf, 0) < 0)
return -errno;
st = &stbuf;
}
if (!path) {
if (dfd == AT_FDCWD)
(void) safe_getcwd(&parent);
else
(void) fd_get_path(dfd, &parent);
}
read_only =
(path && path_startswith(path, "/usr")) ||
(faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
if (S_ISDIR(st->st_mode)) {
_cleanup_close_ int fd = -1;
unsigned file_attr = 0;
if (!ret)
return 0;
if (!pretty) {
r = extract_pretty(filename, NULL, &pretty_buffer);
if (r < 0)
return r;
pretty = pretty_buffer;
}
fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
if (fd < 0)
return -errno;
/* btrfs subvolumes have inode 256 */
if (st->st_ino == 256) {
r = btrfs_is_filesystem(fd);
if (r < 0)
return r;
if (r) {
BtrfsSubvolInfo info;
/* It's a btrfs subvolume */
r = btrfs_subvol_get_info_fd(fd, 0, &info);
if (r < 0)
return r;
r = image_new(IMAGE_SUBVOLUME,
pretty,
path,
filename,
info.read_only || read_only,
info.otime,
0,
ret);
if (r < 0)
return r;
if (btrfs_quota_scan_ongoing(fd) == 0) {
BtrfsQuotaInfo quota;
r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
if (r >= 0) {
(*ret)->usage = quota.referenced;
(*ret)->usage_exclusive = quota.exclusive;
(*ret)->limit = quota.referenced_max;
(*ret)->limit_exclusive = quota.exclusive_max;
}
}
return 0;
}
}
/* If the IMMUTABLE bit is set, we consider the
* directory read-only. Since the ioctl is not
* supported everywhere we ignore failures. */
(void) read_attr_fd(fd, &file_attr);
/* It's just a normal directory. */
r = image_new(IMAGE_DIRECTORY,
pretty,
path,
filename,
read_only || (file_attr & FS_IMMUTABLE_FL),
0,
0,
ret);
if (r < 0)
return r;
return 0;
} else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
usec_t crtime = 0;
/* It's a RAW disk image */
if (!ret)
return 0;
(void) fd_getcrtime_at(dfd, filename, &crtime, 0);
if (!pretty) {
r = extract_pretty(filename, ".raw", &pretty_buffer);
if (r < 0)
return r;
pretty = pretty_buffer;
}
r = image_new(IMAGE_RAW,
pretty,
path,
filename,
!(st->st_mode & 0222) || read_only,
crtime,
timespec_load(&st->st_mtim),
ret);
if (r < 0)
return r;
(*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
(*ret)->limit = (*ret)->limit_exclusive = st->st_size;
return 0;
} else if (S_ISBLK(st->st_mode)) {
_cleanup_close_ int block_fd = -1;
uint64_t size = UINT64_MAX;
/* A block device */
if (!ret)
return 0;
if (!pretty) {
r = extract_pretty(filename, NULL, &pretty_buffer);
if (r < 0)
return r;
pretty = pretty_buffer;
}
block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
if (block_fd < 0)
log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
else {
/* Refresh stat data after opening the node */
if (fstat(block_fd, &stbuf) < 0)
return -errno;
st = &stbuf;
if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
return -ENOTTY;
if (!read_only) {
int state = 0;
if (ioctl(block_fd, BLKROGET, &state) < 0)
log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
else if (state)
read_only = true;
}
if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
block_fd = safe_close(block_fd);
}
r = image_new(IMAGE_BLOCK,
pretty,
path,
filename,
!(st->st_mode & 0222) || read_only,
0,
0,
ret);
if (r < 0)
return r;
if (!IN_SET(size, 0, UINT64_MAX))
(*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
return 0;
}
return -EMEDIUMTYPE;
}
int image_find(ImageClass class, const char *name, Image **ret) {
const char *path;
int r;
assert(class >= 0);
assert(class < _IMAGE_CLASS_MAX);
assert(name);
/* There are no images with invalid names */
if (!image_name_is_valid(name))
return -ENOENT;
NULSTR_FOREACH(path, image_search_path[class]) {
_cleanup_closedir_ DIR *d = NULL;
struct stat st;
d = opendir(path);
if (!d) {
if (errno == ENOENT)
continue;
return -errno;
}
/* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
* symlink block devices into the search path */
if (fstatat(dirfd(d), name, &st, 0) < 0) {
_cleanup_free_ char *raw = NULL;
if (errno != ENOENT)
return -errno;
raw = strjoin(name, ".raw");
if (!raw)
return -ENOMEM;
if (fstatat(dirfd(d), raw, &st, 0) < 0) {
if (errno == ENOENT)
continue;
return -errno;
}
if (!S_ISREG(st.st_mode))
continue;
r = image_make(name, dirfd(d), path, raw, &st, ret);
} else {
if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode))
continue;
r = image_make(name, dirfd(d), path, name, &st, ret);
}
if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
continue;
if (r < 0)
return r;
if (ret)
(*ret)->discoverable = true;
return 1;
}
if (class == IMAGE_MACHINE && streq(name, ".host")) {
r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
if (r < 0)
return r;
if (ret)
(*ret)->discoverable = true;
return r;
}
return -ENOENT;
};
int image_from_path(const char *path, Image **ret) {
/* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
* the image is in the image search path. And if it is we don't know if the path we used is actually not
2018-06-18 22:43:12 +02:00
* overridden by another, different image earlier in the search path */
if (path_equal(path, "/"))
return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret);
}
int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) {
if (image_name_is_valid(name_or_path))
return image_find(class, name_or_path, ret);
return image_from_path(name_or_path, ret);
}
int image_discover(ImageClass class, Hashmap *h) {
const char *path;
int r;
assert(class >= 0);
assert(class < _IMAGE_CLASS_MAX);
assert(h);
NULSTR_FOREACH(path, image_search_path[class]) {
_cleanup_closedir_ DIR *d = NULL;
struct dirent *de;
d = opendir(path);
if (!d) {
if (errno == ENOENT)
continue;
return -errno;
}
FOREACH_DIRENT_ALL(de, d, return -errno) {
_cleanup_(image_unrefp) Image *image = NULL;
_cleanup_free_ char *truncated = NULL;
const char *pretty;
struct stat st;
if (dot_or_dot_dot(de->d_name))
continue;
/* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
* to symlink block devices into the search path */
if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
if (errno == ENOENT)
continue;
return -errno;
}
if (S_ISREG(st.st_mode)) {
const char *e;
e = endswith(de->d_name, ".raw");
if (!e)
continue;
truncated = strndup(de->d_name, e - de->d_name);
if (!truncated)
return -ENOMEM;
pretty = truncated;
} else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode))
pretty = de->d_name;
else
continue;
if (!image_name_is_valid(pretty))
continue;
if (hashmap_contains(h, pretty))
continue;
r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image);
if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
continue;
if (r < 0)
return r;
image->discoverable = true;
r = hashmap_put(h, image->name, image);
if (r < 0)
return r;
image = NULL;
}
}
if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
_cleanup_(image_unrefp) Image *image = NULL;
r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image);
if (r < 0)
return r;
image->discoverable = true;
r = hashmap_put(h, image->name, image);
if (r < 0)
return r;
image = NULL;
}
return 0;
}
int image_remove(Image *i) {
_cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
_cleanup_strv_free_ char **settings = NULL;
_cleanup_free_ char *roothash = NULL;
char **j;
int r;
assert(i);
if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
return -EROFS;
settings = image_settings_path(i);
if (!settings)
return -ENOMEM;
roothash = image_roothash_path(i);
if (!roothash)
return -ENOMEM;
/* Make sure we don't interfere with a running nspawn */
r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
if (r < 0)
return r;
switch (i->type) {
case IMAGE_SUBVOLUME:
/* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
* big guns */
if (unlink(i->path) < 0) {
r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
if (r < 0)
return r;
}
break;
case IMAGE_DIRECTORY:
/* Allow deletion of read-only directories */
(void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
if (r < 0)
return r;
break;
case IMAGE_BLOCK:
/* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
* itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
* the thing (it's most likely a symlink after all). */
if (path_startswith(i->path, "/dev"))
break;
_fallthrough_;
case IMAGE_RAW:
if (unlink(i->path) < 0)
return -errno;
break;
default:
return -EOPNOTSUPP;
}
STRV_FOREACH(j, settings) {
if (unlink(*j) < 0 && errno != ENOENT)
log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
}
if (unlink(roothash) < 0 && errno != ENOENT)
log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
return 0;
}
static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
_cleanup_free_ char *rs = NULL;
const char *fn;
fn = strjoina(new_name, suffix);
rs = file_in_same_dir(path, fn);
if (!rs)
return -ENOMEM;
return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
}
int image_rename(Image *i, const char *new_name) {
_cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
_cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
_cleanup_strv_free_ char **settings = NULL;
unsigned file_attr = 0;
char **j;
int r;
assert(i);
if (!image_name_is_valid(new_name))
return -EINVAL;
if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
return -EROFS;
settings = image_settings_path(i);
if (!settings)
return -ENOMEM;
roothash = image_roothash_path(i);
if (!roothash)
return -ENOMEM;
/* Make sure we don't interfere with a running nspawn */
r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
if (r < 0)
return r;
/* Make sure nobody takes the new name, between the time we
* checked it is currently unused in all search paths, and the
2016-05-04 11:26:17 +02:00
* time we take possession of it */
r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
if (r < 0)
return r;
r = image_find(IMAGE_MACHINE, new_name, NULL);
if (r >= 0)
return -EEXIST;
if (r != -ENOENT)
return r;
switch (i->type) {
case IMAGE_DIRECTORY:
/* Turn of the immutable bit while we rename the image, so that we can rename it */
(void) read_attr_path(i->path, &file_attr);
if (file_attr & FS_IMMUTABLE_FL)
(void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
_fallthrough_;
case IMAGE_SUBVOLUME:
new_path = file_in_same_dir(i->path, new_name);
break;
case IMAGE_BLOCK:
/* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
if (path_startswith(i->path, "/dev"))
return -EROFS;
new_path = file_in_same_dir(i->path, new_name);
break;
case IMAGE_RAW: {
const char *fn;
fn = strjoina(new_name, ".raw");
new_path = file_in_same_dir(i->path, fn);
break;
}
default:
return -EOPNOTSUPP;
}
if (!new_path)
return -ENOMEM;
nn = strdup(new_name);
if (!nn)
return -ENOMEM;
r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
if (r < 0)
return r;
/* Restore the immutable bit, if it was set before */
if (file_attr & FS_IMMUTABLE_FL)
(void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
free_and_replace(i->path, new_path);
free_and_replace(i->name, nn);
STRV_FOREACH(j, settings) {
r = rename_auxiliary_file(*j, new_name, ".nspawn");
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
}
r = rename_auxiliary_file(roothash, new_name, ".roothash");
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
return 0;
}
static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
_cleanup_free_ char *rs = NULL;
const char *fn;
fn = strjoina(new_name, suffix);
rs = file_in_same_dir(path, fn);
if (!rs)
return -ENOMEM;
return copy_file_atomic(path, rs, 0664, 0, 0, COPY_REFLINK);
}
int image_clone(Image *i, const char *new_name, bool read_only) {
_cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
_cleanup_strv_free_ char **settings = NULL;
_cleanup_free_ char *roothash = NULL;
const char *new_path;
char **j;
int r;
assert(i);
if (!image_name_is_valid(new_name))
return -EINVAL;
settings = image_settings_path(i);
if (!settings)
return -ENOMEM;
roothash = image_roothash_path(i);
if (!roothash)
return -ENOMEM;
/* Make sure nobody takes the new name, between the time we
* checked it is currently unused in all search paths, and the
2016-05-04 11:26:17 +02:00
* time we take possession of it */
r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
if (r < 0)
return r;
r = image_find(IMAGE_MACHINE, new_name, NULL);
if (r >= 0)
return -EEXIST;
if (r != -ENOENT)
return r;
switch (i->type) {
case IMAGE_SUBVOLUME:
case IMAGE_DIRECTORY:
/* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
2017-02-24 18:14:02 +01:00
* directory. */
new_path = strjoina("/var/lib/machines/", new_name);
r = btrfs_subvol_snapshot(i->path, new_path,
(read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
BTRFS_SNAPSHOT_FALLBACK_COPY |
BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
BTRFS_SNAPSHOT_RECURSIVE |
BTRFS_SNAPSHOT_QUOTA);
if (r >= 0)
/* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
(void) btrfs_subvol_auto_qgroup(new_path, 0, true);
break;
case IMAGE_RAW:
new_path = strjoina("/var/lib/machines/", new_name, ".raw");
r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME);
break;
case IMAGE_BLOCK:
default:
return -EOPNOTSUPP;
}
if (r < 0)
return r;
STRV_FOREACH(j, settings) {
r = clone_auxiliary_file(*j, new_name, ".nspawn");
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
}
r = clone_auxiliary_file(roothash, new_name, ".roothash");
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
return 0;
}
int image_read_only(Image *i, bool b) {
_cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
int r;
assert(i);
if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
return -EROFS;
/* Make sure we don't interfere with a running nspawn */
r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
if (r < 0)
return r;
switch (i->type) {
case IMAGE_SUBVOLUME:
/* Note that we set the flag only on the top-level
* subvolume of the image. */
r = btrfs_subvol_set_read_only(i->path, b);
if (r < 0)
return r;
break;
case IMAGE_DIRECTORY:
/* For simple directory trees we cannot use the access
mode of the top-level directory, since it has an
effect on the container itself. However, we can
use the "immutable" flag, to at least make the
top-level directory read-only. It's not as good as
a read-only subvolume, but at least something, and
2017-02-24 18:14:02 +01:00
we can read the value back. */
r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
if (r < 0)
return r;
break;
case IMAGE_RAW: {
struct stat st;
if (stat(i->path, &st) < 0)
return -errno;
if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
return -errno;
/* If the images is now read-only, it's a good time to
* defrag it, given that no write patterns will
* fragment it again. */
if (b)
(void) btrfs_defrag(i->path);
break;
}
case IMAGE_BLOCK: {
_cleanup_close_ int fd = -1;
struct stat st;
int state = b;
fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
if (fd < 0)
return -errno;
if (fstat(fd, &st) < 0)
return -errno;
if (!S_ISBLK(st.st_mode))
return -ENOTTY;
if (ioctl(fd, BLKROSET, &state) < 0)
return -errno;
break;
}
default:
return -EOPNOTSUPP;
}
return 0;
}
int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
_cleanup_free_ char *p = NULL;
LockFile t = LOCK_FILE_INIT;
struct stat st;
bool exclusive;
int r;
assert(path);
assert(global);
assert(local);
/* Locks an image path. This actually creates two locks: one "local" one, next to the image path
* itself, which might be shared via NFS. And another "global" one, in /run, that uses the
* device/inode number. This has the benefit that we can even lock a tree that is a mount point,
* correctly. */
if (!path_is_absolute(path))
return -EINVAL;
switch (operation & (LOCK_SH|LOCK_EX)) {
case LOCK_SH:
exclusive = false;
break;
case LOCK_EX:
exclusive = true;
break;
default:
return -EINVAL;
}
if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
*local = *global = (LockFile) LOCK_FILE_INIT;
return 0;
}
/* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
* running off it after all, and we don't want any images to manipulate the host image. We make an
* exception for shared locks however: we allow those (and make them NOPs since there's no point in
* taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
* since it means changes made to the host might propagate to the container as they happen (and a
* shared lock kinda suggests that no changes happen at all while it is in place), but it's too
* useful not to allow read-only containers off the host root, hence let's support this, and trust
* the user to do the right thing with this. */
if (path_equal(path, "/")) {
if (exclusive)
return -EBUSY;
*local = *global = (LockFile) LOCK_FILE_INIT;
return 0;
}
if (stat(path, &st) >= 0) {
if (S_ISBLK(st.st_mode))
r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
else
return -ENOTTY;
if (r < 0)
return -ENOMEM;
}
/* For block devices we don't need the "local" lock, as the major/minor lock above should be
* sufficient, since block devices are host local anyway. */
if (!path_startswith(path, "/dev/")) {
r = make_lock_file_for(path, operation, &t);
if (r < 0) {
if (!exclusive && r == -EROFS)
log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
else
return r;
}
}
if (p) {
2019-03-27 11:33:50 +01:00
(void) mkdir_p("/run/systemd/nspawn/locks", 0700);
r = make_lock_file(p, operation, global);
if (r < 0) {
release_lock_file(&t);
return r;
}
} else
*global = (LockFile) LOCK_FILE_INIT;
*local = t;
return 0;
}
int image_set_limit(Image *i, uint64_t referenced_max) {
assert(i);
if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
return -EROFS;
if (i->type != IMAGE_SUBVOLUME)
return -EOPNOTSUPP;
/* We set the quota both for the subvolume as well as for the
* subtree. The latter is mostly for historical reasons, since
* we didn't use to have a concept of subtree quota, and hence
* only modified the subvolume quota. */
(void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
(void) btrfs_subvol_auto_qgroup(i->path, 0, true);
return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
}
int image_read_metadata(Image *i) {
_cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
int r;
assert(i);
r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
if (r < 0)
return r;
switch (i->type) {
case IMAGE_SUBVOLUME:
case IMAGE_DIRECTORY: {
_cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
sd_id128_t machine_id = SD_ID128_NULL;
_cleanup_free_ char *hostname = NULL;
_cleanup_free_ char *path = NULL;
r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
else if (r >= 0) {
r = read_etc_hostname(path, &hostname);
if (r < 0)
log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
}
path = mfree(path);
r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
else if (r >= 0) {
_cleanup_close_ int fd = -1;
fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (fd < 0)
log_debug_errno(errno, "Failed to open %s: %m", path);
else {
r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
if (r < 0)
log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
}
}
path = mfree(path);
r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
else if (r >= 0) {
r = load_env_file_pairs(NULL, path, &machine_info);
if (r < 0)
log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
}
r = load_os_release_pairs(i->path, &os_release);
if (r < 0)
log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
free_and_replace(i->hostname, hostname);
i->machine_id = machine_id;
strv_free_and_replace(i->machine_info, machine_info);
strv_free_and_replace(i->os_release, os_release);
break;
}
case IMAGE_RAW:
case IMAGE_BLOCK: {
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
if (r < 0)
return r;
dissect: introduce new recognizable partition types for /var and /var/tmp This has been requested many times before. Let's add it finally. GPT auto-discovery for /var is a bit more complex than for other partition types: the other partitions can to some degree be shared between multiple OS installations on the same disk (think: swap, /home, /srv). However, /var is inherently something bound to an installation, i.e. specific to its identity, or actually *is* its identity, and hence something that cannot be shared. To deal with this this new code is particularly careful when it comes to /var: it will not mount things blindly, but insist that the UUID of the partition matches a hashed version of the machine-id of the installation, so that each installation has a very specific /var associated with it, and would never use any other. (We actually use HMAC-SHA256 on the GPT partition type for /var, keyed by the machine-id, since machine-id is something we want to keep somewhat private). Setting the right UUID for installations takes extra care. To make things a bit simpler to set up, we avoid this safety check for nspawn and RootImage= in unit files, under the assumption that such container and service images unlikely will have multiple installations on them. The check is hence only required when booting full machines, i.e. in in systemd-gpt-auto-generator. To help with putting together images for full machines, PR #14368 introduces a repartition tool that can automatically fill in correctly calculated UUIDs on first boot if images have the var partition UUID initialized to all zeroes. With that in place systems can be put together in a way that on first boot the machine ID is determined and the partition table automatically adjusted to have the /var partition with the right UUID.
2019-12-18 12:22:40 +01:00
r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_RELAX_VAR_CHECK, &m);
if (r < 0)
return r;
r = dissected_image_acquire_metadata(m);
if (r < 0)
return r;
free_and_replace(i->hostname, m->hostname);
i->machine_id = m->machine_id;
strv_free_and_replace(i->machine_info, m->machine_info);
strv_free_and_replace(i->os_release, m->os_release);
break;
}
default:
return -EOPNOTSUPP;
}
i->metadata_valid = true;
return 0;
}
int image_name_lock(const char *name, int operation, LockFile *ret) {
assert(name);
assert(ret);
/* Locks an image name, regardless of the precise path used. */
if (!image_name_is_valid(name))
return -EINVAL;
if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
*ret = (LockFile) LOCK_FILE_INIT;
return 0;
}
if (streq(name, ".host"))
return -EBUSY;
2019-03-27 11:33:50 +01:00
const char *p = strjoina("/run/systemd/nspawn/locks/name-", name);
(void) mkdir_p("/run/systemd/nspawn/locks", 0700);
return make_lock_file(p, operation, ret);
}
bool image_name_is_valid(const char *s) {
if (!filename_is_valid(s))
return false;
if (string_has_cc(s, NULL))
return false;
if (!utf8_is_valid(s))
return false;
/* Temporary files for atomically creating new files */
if (startswith(s, ".#"))
return false;
return true;
}
bool image_in_search_path(ImageClass class, const char *image) {
const char *path;
assert(image);
NULSTR_FOREACH(path, image_search_path[class]) {
const char *p;
size_t k;
p = path_startswith(image, path);
if (!p)
continue;
/* Make sure there's a filename following */
k = strcspn(p, "/");
if (k == 0)
continue;
p += k;
/* Accept trailing slashes */
if (p[strspn(p, "/")] == 0)
return true;
}
return false;
}
static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
[IMAGE_DIRECTORY] = "directory",
[IMAGE_SUBVOLUME] = "subvolume",
[IMAGE_RAW] = "raw",
[IMAGE_BLOCK] = "block",
};
DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);