Merge pull request #15891 from bluca/host_os_release

Container Interface: expose the host's os-release metadata to nspawn and portable guests
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2020-07-08 23:52:13 +02:00 committed by GitHub
commit 55aacd502b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 153 additions and 84 deletions

View File

@ -121,6 +121,16 @@ manager, please consider supporting the following interfaces.
`container_ttys=pts/7 pts/8 pts/14` it will spawn three additional login
gettys on ptys 7, 8, and 14.
4. To allow applications to detect the OS version and other metadata of the host
running the container manager, if this is considered desirable, please parse
the host's `/etc/os-release` and set a `$container_host_<key>=<VALUE>`
environment variable for the ID fields described by the [os-release
interface](https://www.freedesktop.org/software/systemd/man/os-release.html), eg:
`$container_host_id=debian`
`$container_host_build_id=2020-06-15`
`$container_host_variant_id=server`
`$container_host_version_id=10`
## Advanced Integration
1. Consider syncing `/etc/localtime` from the host file system into the

View File

@ -339,6 +339,13 @@
name in order to avoid name clashes. Applications
reading this file must ignore unknown fields. Example:
<literal>DEBIAN_BTS="debbugs://bugs.debian.org/"</literal></para>
<para>Container and sandbox runtime managers may make the host's
identification data available to applications by providing the host's
<filename>/etc/os-release</filename> and
<filename>/usr/lib/os-release</filename> as respectively
<filename>/run/host/etc/os-release</filename> and
<filename>/run/host/usr/lib/os-release</filename>.</para>
</refsect1>
<refsect1>

View File

@ -10,6 +10,7 @@
#include "mkdir.h"
#include "selinux-util.h"
#include "smack-util.h"
#include "user-util.h"
int mkdir_label(const char *path, mode_t mode) {
int r;
@ -50,9 +51,9 @@ int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirF
}
int mkdir_parents_label(const char *path, mode_t mode) {
return mkdir_parents_internal(NULL, path, mode, mkdir_label);
return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_label);
}
int mkdir_p_label(const char *path, mode_t mode) {
return mkdir_p_internal(NULL, path, mode, mkdir_label);
return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_label);
}

View File

@ -93,7 +93,7 @@ int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags f
return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_errno_wrapper);
}
int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
const char *p, *e;
int r;
@ -136,34 +136,54 @@ int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mk
if (prefix && path_startswith(prefix, t))
continue;
r = _mkdir(t, mode);
if (r < 0 && r != -EEXIST)
return r;
if (uid == UID_INVALID && gid == UID_INVALID && flags == 0) {
r = _mkdir(t, mode);
if (r < 0 && r != -EEXIST)
return r;
} else {
r = mkdir_safe_internal(t, mode, uid, gid, flags, _mkdir);
if (r < 0 && r != -EEXIST)
return r;
}
}
}
int mkdir_parents(const char *path, mode_t mode) {
return mkdir_parents_internal(NULL, path, mode, mkdir_errno_wrapper);
return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_errno_wrapper);
}
int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
return mkdir_parents_internal(prefix, path, mode, uid, gid, flags, mkdir_errno_wrapper);
}
int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
int r;
/* Like mkdir -p */
assert(_mkdir != mkdir);
r = mkdir_parents_internal(prefix, path, mode, _mkdir);
r = mkdir_parents_internal(prefix, path, mode, uid, gid, flags, _mkdir);
if (r < 0)
return r;
r = _mkdir(path, mode);
if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
return r;
if (uid == UID_INVALID && gid == UID_INVALID && flags == 0) {
r = _mkdir(path, mode);
if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
return r;
} else {
r = mkdir_safe_internal(path, mode, uid, gid, flags, _mkdir);
if (r < 0 && r != -EEXIST)
return r;
}
return 0;
}
int mkdir_p(const char *path, mode_t mode) {
return mkdir_p_internal(NULL, path, mode, mkdir_errno_wrapper);
return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_errno_wrapper);
}
int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
return mkdir_p_internal(prefix, path, mode, uid, gid, flags, mkdir_errno_wrapper);
}

View File

@ -12,15 +12,17 @@ int mkdir_errno_wrapper(const char *pathname, mode_t mode);
int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode);
int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
int mkdir_parents(const char *path, mode_t mode);
int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
int mkdir_p(const char *path, mode_t mode);
int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
/* mandatory access control(MAC) versions */
int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
int mkdir_parents_label(const char *path, mode_t mode);
int mkdir_parents_label(const char *path, mode_t mod);
int mkdir_p_label(const char *path, mode_t mode);
/* internally used */
typedef int (*mkdir_func_t)(const char *pathname, mode_t mode);
int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);

View File

@ -487,59 +487,6 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
}
static int mkdir_userns(const char *path, mode_t mode, uid_t uid_shift) {
int r;
assert(path);
r = mkdir_errno_wrapper(path, mode);
if (r < 0 && r != -EEXIST)
return r;
if (uid_shift == UID_INVALID)
return 0;
if (lchown(path, uid_shift, uid_shift) < 0)
return -errno;
return 0;
}
static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, uid_t uid_shift) {
const char *p, *e;
int r;
assert(path);
if (prefix && !path_startswith(path, prefix))
return -ENOTDIR;
/* create every parent directory in the path, except the last component */
p = path + strspn(path, "/");
for (;;) {
char t[strlen(path) + 1];
e = p + strcspn(p, "/");
p = e + strspn(e, "/");
/* Is this the last component? If so, then we're done */
if (*p == 0)
break;
memcpy(t, path, e - path);
t[e-path] = 0;
if (prefix && path_startswith(prefix, t))
continue;
r = mkdir_userns(t, mode, uid_shift);
if (r < 0)
return r;
}
return mkdir_userns(path, mode, uid_shift);
}
int mount_all(const char *dest,
MountSettingsMask mount_settings,
uid_t uid_shift,
@ -598,29 +545,33 @@ int mount_all(const char *dest,
PROC_READ_ONLY("/proc/irq"),
PROC_READ_ONLY("/proc/scsi"),
{ "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_IN_USERNS|MOUNT_MKDIR },
/* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
{ "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
{ "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR },
{ "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR },
{ "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */
{ "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */
{ "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
{ "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
{ "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
{ "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
{ "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
{ "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
{ "/usr/lib/os-release", "/run/host/usr/lib/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_MKDIR|MOUNT_TOUCH },
{ "/etc/os-release", "/run/host/etc/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_MKDIR|MOUNT_TOUCH },
#if HAVE_SELINUX
{ "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
{ "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
MOUNT_MKDIR }, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
{ NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
{ NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
0 }, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
#endif
};
@ -636,6 +587,7 @@ int mount_all(const char *dest,
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
_cleanup_free_ char *where = NULL, *options = NULL;
const char *o;
struct stat source_st;
bool fatal = FLAGS_SET(mount_table[k].mount_settings, MOUNT_FATAL);
if (in_userns != FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS))
@ -661,10 +613,26 @@ int mount_all(const char *dest,
return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
if (r > 0)
continue;
/* Shortcut for optional bind mounts: if the source can't be found skip ahead to avoid creating
* empty and unused directories. */
if (!fatal && FLAGS_SET(mount_table[k].mount_settings, MOUNT_MKDIR) && FLAGS_SET(mount_table[k].flags, MS_BIND)) {
r = stat(mount_table[k].what, &source_st);
if (r < 0) {
if (errno == ENOENT)
continue;
return log_error_errno(errno, "Failed to stat %s: %m", mount_table[k].what);
}
}
}
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_MKDIR)) {
r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH))
r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
else
r = mkdir_p_safe(dest, where, 0755, u, u, 0);
if (r < 0 && r != -EEXIST) {
if (fatal && r != -EROFS)
return log_error_errno(r, "Failed to create directory %s: %m", where);
@ -676,6 +644,14 @@ int mount_all(const char *dest,
if (r != -EROFS)
continue;
}
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH)) {
r = touch(where);
if (r < 0 && r != -EEXIST) {
if (fatal)
return log_error_errno(r, "Failed to create mount point %s: %m", where);
log_debug_errno(r, "Failed to create mount point %s: %m", where);
}
}
}
o = mount_table[k].options;

View File

@ -17,6 +17,7 @@ typedef enum MountSettingsMask {
MOUNT_ROOT_ONLY = 1 << 6, /* if set, only root mounts are mounted */
MOUNT_NON_ROOT_ONLY = 1 << 7, /* if set, only non-root mounts are mounted */
MOUNT_MKDIR = 1 << 8, /* if set, make directory to mount over first */
MOUNT_TOUCH = 1 << 9, /* if set, touch file to mount over first */
} MountSettingsMask;
typedef enum CustomMountType {

View File

@ -2931,7 +2931,8 @@ static int inner_child(
int kmsg_socket,
int rtnl_socket,
int master_pty_socket,
FDSet *fds) {
FDSet *fds,
char **os_release_pairs) {
_cleanup_free_ char *home = NULL;
char as_uuid[ID128_UUID_STRING_MAX];
@ -3190,7 +3191,7 @@ static int inner_child(
if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
return log_oom();
env_use = strv_env_merge(2, envp, arg_setenv);
env_use = strv_env_merge(3, envp, arg_setenv, os_release_pairs);
if (!env_use)
return log_oom();
@ -3316,6 +3317,7 @@ static int outer_child(
FDSet *fds,
int netns_fd) {
_cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -1;
const char *p;
pid_t pid;
@ -3337,6 +3339,10 @@ static int outer_child(
log_debug("Outer child is initializing.");
r = load_os_release_pairs_with_prefix("/", "container_host_", &os_release_pairs);
if (r < 0)
log_debug_errno(r, "Failed to read os-release from host for container, ignoring: %m");
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
@ -3602,7 +3608,7 @@ static int outer_child(
return log_error_errno(r, "Failed to join network namespace: %m");
}
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds);
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds, os_release_pairs);
if (r < 0)
_exit(EXIT_FAILURE);

View File

@ -701,6 +701,7 @@ static int install_chroot_dropin(
"[Service]\n",
IN_SET(type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME) ? "RootDirectory=" : "RootImage=", image_path, "\n"
"Environment=PORTABLE=", basename(image_path), "\n"
"BindReadOnlyPaths=-/etc/os-release:/run/host/etc/os-release /usr/lib/os-release:/run/host/usr/lib/os-release\n"
"LogExtraFields=PORTABLE=", basename(image_path), "\n",
NULL))

View File

@ -117,3 +117,33 @@ int load_os_release_pairs(const char *root, char ***ret) {
return load_env_file_pairs(f, p, ret);
}
int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) {
_cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL;
char **p, **q;
int r;
r = load_os_release_pairs(root, &os_release_pairs);
if (r < 0)
return r;
STRV_FOREACH_PAIR(p, q, os_release_pairs) {
char *line;
// We strictly return only the four main ID fields and ignore the rest
if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID"))
continue;
ascii_strlower(*p);
line = strjoin(prefix, *p, "=", *q);
if (!line)
return -ENOMEM;
r = strv_consume(&os_release_pairs_prefixed, line);
if (r < 0)
return r;
}
*ret = TAKE_PTR(os_release_pairs_prefixed);
return 0;
}

View File

@ -10,3 +10,4 @@ int fopen_os_release(const char *root, char **ret_path, FILE **ret_file);
int parse_os_release(const char *root, ...) _sentinel_;
int load_os_release_pairs(const char *root, char ***ret);
int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret);

View File

@ -16,7 +16,7 @@ test_create_image() {
mask_supporting_services
../create-busybox-container $initdir/testsuite-13.nc-container
initdir="$initdir/testsuite-13.nc-container" dracut_install nc ip
initdir="$initdir/testsuite-13.nc-container" dracut_install nc ip md5sum
)
}

View File

@ -60,6 +60,18 @@ function check_notification_socket {
systemd-nspawn --register=no -D /testsuite-13.nc-container -U /bin/sh -x -c "$_cmd"
}
function check_os_release {
local _cmd='. /tmp/os-release
if [ -n "${ID:+set}" ] && [ "${ID}" != "${container_host_id}" ]; then exit 1; fi
if [ -n "${VERSION_ID:+set}" ] && [ "${VERSION_ID}" != "${container_host_version_id}" ]; then exit 1; fi
if [ -n "${BUILD_ID:+set}" ] && [ "${BUILD_ID}" != "${container_host_build_id}" ]; then exit 1; fi
if [ -n "${VARIANT_ID:+set}" ] && [ "${VARIANT_ID}" != "${container_host_variant_id}" ]; then exit 1; fi
cd /tmp; (cd /run/host/usr/lib; md5sum os-release) | md5sum -c
'
systemd-nspawn --register=no -D /testsuite-13.nc-container --bind=/etc/os-release:/tmp/os-release /bin/sh -x -e -c "$_cmd"
}
function run {
if [[ "$1" = "yes" && "$is_v2_supported" = "no" ]]; then
printf "Unified cgroup hierarchy is not supported. Skipping.\n" >&2
@ -144,6 +156,8 @@ check_norbind
check_notification_socket
check_os_release
for api_vfs_writable in yes no network; do
run no no $api_vfs_writable
run yes no $api_vfs_writable