nspawn: implement container host os-release interface

This commit is contained in:
Luca Boccassi 2020-05-22 16:06:54 +01:00
parent 34e0d56ce2
commit e1bb4b0d1d
7 changed files with 95 additions and 15 deletions

View File

@ -545,29 +545,33 @@ int mount_all(const char *dest,
PROC_READ_ONLY("/proc/irq"),
PROC_READ_ONLY("/proc/scsi"),
{ "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_IN_USERNS|MOUNT_MKDIR },
/* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
{ "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
{ "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR },
{ "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR },
{ "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */
{ "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
{ "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */
{ "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
{ "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
{ "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
{ "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
{ "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
{ "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
MOUNT_FATAL|MOUNT_MKDIR },
{ "/usr/lib/os-release", "/run/host/usr/lib/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_FATAL|MOUNT_MKDIR|MOUNT_TOUCH },
{ "/etc/os-release", "/run/host/etc/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
MOUNT_MKDIR|MOUNT_TOUCH },
#if HAVE_SELINUX
{ "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
{ "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
MOUNT_MKDIR }, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
{ NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
{ NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
0 }, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
#endif
};
@ -583,6 +587,7 @@ int mount_all(const char *dest,
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
_cleanup_free_ char *where = NULL, *options = NULL;
const char *o;
struct stat source_st;
bool fatal = FLAGS_SET(mount_table[k].mount_settings, MOUNT_FATAL);
if (in_userns != FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS))
@ -608,11 +613,26 @@ int mount_all(const char *dest,
return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
if (r > 0)
continue;
/* Shortcut for optional bind mounts: if the source can't be found skip ahead to avoid creating
* empty and unused directories. */
if (!fatal && FLAGS_SET(mount_table[k].mount_settings, MOUNT_MKDIR) && FLAGS_SET(mount_table[k].flags, MS_BIND)) {
r = stat(mount_table[k].what, &source_st);
if (r < 0) {
if (errno == ENOENT)
continue;
return log_error_errno(errno, "Failed to stat %s: %m", mount_table[k].what);
}
}
}
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_MKDIR)) {
uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
r = mkdir_p_safe(dest, where, 0755, u, u, 0);
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH))
r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
else
r = mkdir_p_safe(dest, where, 0755, u, u, 0);
if (r < 0 && r != -EEXIST) {
if (fatal && r != -EROFS)
return log_error_errno(r, "Failed to create directory %s: %m", where);
@ -624,6 +644,14 @@ int mount_all(const char *dest,
if (r != -EROFS)
continue;
}
if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH)) {
r = touch(where);
if (r < 0 && r != -EEXIST) {
if (fatal)
return log_error_errno(r, "Failed to create mount point %s: %m", where);
log_debug_errno(r, "Failed to create mount point %s: %m", where);
}
}
}
o = mount_table[k].options;

View File

@ -17,6 +17,7 @@ typedef enum MountSettingsMask {
MOUNT_ROOT_ONLY = 1 << 6, /* if set, only root mounts are mounted */
MOUNT_NON_ROOT_ONLY = 1 << 7, /* if set, only non-root mounts are mounted */
MOUNT_MKDIR = 1 << 8, /* if set, make directory to mount over first */
MOUNT_TOUCH = 1 << 9, /* if set, touch file to mount over first */
} MountSettingsMask;
typedef enum CustomMountType {

View File

@ -2894,7 +2894,8 @@ static int inner_child(
int kmsg_socket,
int rtnl_socket,
int master_pty_socket,
FDSet *fds) {
FDSet *fds,
char **os_release_pairs) {
_cleanup_free_ char *home = NULL;
char as_uuid[ID128_UUID_STRING_MAX];
@ -3153,7 +3154,7 @@ static int inner_child(
if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
return log_oom();
env_use = strv_env_merge(2, envp, arg_setenv);
env_use = strv_env_merge(3, envp, arg_setenv, os_release_pairs);
if (!env_use)
return log_oom();
@ -3279,6 +3280,7 @@ static int outer_child(
FDSet *fds,
int netns_fd) {
_cleanup_strv_free_ char **os_release_pairs = NULL;
_cleanup_close_ int fd = -1;
const char *p;
pid_t pid;
@ -3300,6 +3302,10 @@ static int outer_child(
log_debug("Outer child is initializing.");
r = load_os_release_pairs_with_prefix("/", "container_host_", &os_release_pairs);
if (r < 0)
log_debug_errno(r, "Failed to read os-release from host for container, ignoring: %m");
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
@ -3565,7 +3571,7 @@ static int outer_child(
return log_error_errno(r, "Failed to join network namespace: %m");
}
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds);
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds, os_release_pairs);
if (r < 0)
_exit(EXIT_FAILURE);

View File

@ -117,3 +117,33 @@ int load_os_release_pairs(const char *root, char ***ret) {
return load_env_file_pairs(f, p, ret);
}
int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) {
_cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL;
char **p, **q;
int r;
r = load_os_release_pairs(root, &os_release_pairs);
if (r < 0)
return r;
STRV_FOREACH_PAIR(p, q, os_release_pairs) {
char *line;
// We strictly return only the four main ID fields and ignore the rest
if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID"))
continue;
ascii_strlower(*p);
line = strjoin(prefix, *p, "=", *q);
if (!line)
return -ENOMEM;
r = strv_consume(&os_release_pairs_prefixed, line);
if (r < 0)
return r;
}
*ret = TAKE_PTR(os_release_pairs_prefixed);
return 0;
}

View File

@ -10,3 +10,4 @@ int fopen_os_release(const char *root, char **ret_path, FILE **ret_file);
int parse_os_release(const char *root, ...) _sentinel_;
int load_os_release_pairs(const char *root, char ***ret);
int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret);

View File

@ -16,7 +16,7 @@ test_create_image() {
mask_supporting_services
../create-busybox-container $initdir/testsuite-13.nc-container
initdir="$initdir/testsuite-13.nc-container" dracut_install nc ip
initdir="$initdir/testsuite-13.nc-container" dracut_install nc ip md5sum
)
}

View File

@ -60,6 +60,18 @@ function check_notification_socket {
systemd-nspawn --register=no -D /testsuite-13.nc-container -U /bin/sh -x -c "$_cmd"
}
function check_os_release {
local _cmd='. /tmp/os-release
if [ -n "${ID:+set}" ] && [ "${ID}" != "${container_host_id}" ]; then exit 1; fi
if [ -n "${VERSION_ID:+set}" ] && [ "${VERSION_ID}" != "${container_host_version_id}" ]; then exit 1; fi
if [ -n "${BUILD_ID:+set}" ] && [ "${BUILD_ID}" != "${container_host_build_id}" ]; then exit 1; fi
if [ -n "${VARIANT_ID:+set}" ] && [ "${VARIANT_ID}" != "${container_host_variant_id}" ]; then exit 1; fi
cd /tmp; (cd /run/host/usr/lib; md5sum os-release) | md5sum -c
'
systemd-nspawn --register=no -D /testsuite-13.nc-container --bind=/etc/os-release:/tmp/os-release /bin/sh -x -e -c "$_cmd"
}
function run {
if [[ "$1" = "yes" && "$is_v2_supported" = "no" ]]; then
printf "Unified cgroup hierarchy is not supported. Skipping.\n" >&2
@ -144,6 +156,8 @@ check_norbind
check_notification_socket
check_os_release
for api_vfs_writable in yes no network; do
run no no $api_vfs_writable
run yes no $api_vfs_writable