From b53ede699cdc5233041a22591f18863fb3fe2672 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 8 Feb 2017 15:54:31 +0000 Subject: [PATCH] nspawn: Add support for sysroot pivoting (#5258) Add a new --pivot-root argument to systemd-nspawn, which specifies a directory to pivot to / inside the container; while the original / is pivoted to another specified directory (if provided). This adds support for booting container images which may contain several bootable sysroots, as is common with OSTree disk images. When these disk images are booted on real hardware, ostree-prepare-root is run in conjunction with sysroot.mount in the initramfs to achieve the same results. --- man/systemd-nspawn.xml | 21 +++++++ man/systemd.nspawn.xml | 9 +++ src/nspawn/nspawn-gperf.gperf | 1 + src/nspawn/nspawn-mount.c | 113 ++++++++++++++++++++++++++++++++++ src/nspawn/nspawn-mount.h | 3 + src/nspawn/nspawn-settings.c | 30 +++++++++ src/nspawn/nspawn-settings.h | 6 +- src/nspawn/nspawn.c | 29 +++++++++ 8 files changed, 211 insertions(+), 1 deletion(-) diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index f6b3f57fc7..5e671d21e8 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -335,6 +335,21 @@ an absolute path in the container's file system namespace. + + + + Pivot the specified directory to / inside the container, and either unmount the + container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the + specified path will be pivoted to / and the old root will be unmounted; or a colon-separated pair + of new root path and pivot destination for the old root. The new root path will be pivoted to /, + and the old / will be pivoted to the other directory. Both paths must be absolute, and are resolved + in the container's file system namespace. + + This is for containers which have several bootable directories in them; for example, several + OSTree deployments. It emulates the behavior of the boot + loader and initial RAM disk which normally select which directory to mount as root and start the container's PID 1 in. + + @@ -1082,6 +1097,12 @@ # chcon system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -R /srv/container # systemd-nspawn -L system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -Z system_u:system_r:svirt_lxc_net_t:s0:c0,c1 -D /srv/container /bin/sh + + + Run a container with an OSTree deployment + + # systemd-nspawn -b -i ~/image.raw --pivot-root=/ostree/deploy/$OS/deploy/$CHECKSUM:/sysroot --bind=+/sysroot/ostree/deploy/$OS/var:/var + diff --git a/man/systemd.nspawn.xml b/man/systemd.nspawn.xml index 7143188356..4f3f052911 100644 --- a/man/systemd.nspawn.xml +++ b/man/systemd.nspawn.xml @@ -201,6 +201,15 @@ switch. + + PivotRoot= + + Selects a directory to pivot to / inside the container when starting up. + Takes a single path, or a pair of two paths separated by a colon. Both paths must be absolute, and are resolved + in the container's file system namespace. This corresponds to the command line + switch. + + Capability= DropCapability= diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf index c0fa4bfa1f..e5fdf63162 100644 --- a/src/nspawn/nspawn-gperf.gperf +++ b/src/nspawn/nspawn-gperf.gperf @@ -26,6 +26,7 @@ Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory) +Exec.PivotRoot, config_parse_pivot_root, 0, 0 Exec.PrivateUsers, config_parse_private_users, 0, 0 Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready) Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 72c007f204..4b2838b752 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -1349,3 +1349,116 @@ fail: (void) rmdir(template); return r; } + +/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */ +int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) { + _cleanup_free_ char *root_new = NULL, *root_old = NULL; + const char *p = s; + int r; + + assert(pivot_root_new); + assert(pivot_root_old); + + r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + if (isempty(p)) + root_old = NULL; + else { + root_old = strdup(p); + if (!root_old) + return -ENOMEM; + } + + if (!path_is_absolute(root_new)) + return -EINVAL; + if (root_old && !path_is_absolute(root_old)) + return -EINVAL; + + free_and_replace(*pivot_root_new, root_new); + free_and_replace(*pivot_root_old, root_old); + + return 0; +} + +int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) { + _cleanup_free_ char *directory_pivot_root_new = NULL; + _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL; + char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX"; + bool remove_pivot_tmp = false; + int r; + + assert(directory); + + if (!pivot_root_new) + return 0; + + /* Pivot pivot_root_new to / and the existing / to pivot_root_old. + * If pivot_root_old is NULL, the existing / disappears. + * This requires a temporary directory, pivot_tmp, which is + * not a child of either. + * + * This is typically used for OSTree-style containers, where + * the root partition contains several sysroots which could be + * run. Normally, one would be chosen by the bootloader and + * pivoted to / by initramfs. + * + * For example, for an OSTree deployment, pivot_root_new + * would be: /ostree/deploy/$os/deploy/$checksum. Note that this + * code doesn’t do the /var mount which OSTree expects: use + * --bind +/sysroot/ostree/deploy/$os/var:/var for that. + * + * So in the OSTree case, we’ll end up with something like: + * - directory = /tmp/nspawn-root-123456 + * - pivot_root_new = /ostree/deploy/os/deploy/123abc + * - pivot_root_old = /sysroot + * - directory_pivot_root_new = + * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc + * - pivot_tmp = /tmp/nspawn-pivot-123456 + * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot + * + * Requires all file systems at directory and below to be mounted + * MS_PRIVATE or MS_SLAVE so they can be moved. + */ + directory_pivot_root_new = prefix_root(directory, pivot_root_new); + + /* Remount directory_pivot_root_new to make it movable. */ + r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL); + if (r < 0) + goto done; + + if (pivot_root_old) { + if (!mkdtemp(pivot_tmp)) { + r = log_error_errno(errno, "Failed to create temporary directory: %m"); + goto done; + } + + remove_pivot_tmp = true; + pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old); + + r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + + r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + + r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + } else { + r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + } + +done: + if (remove_pivot_tmp) + (void) rmdir(pivot_tmp); + + return r; +} diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 6b33fbff57..2777d2169b 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -70,3 +70,6 @@ int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns, int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); + +int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s); +int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old); diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c index 22b74d88e4..5217d10665 100644 --- a/src/nspawn/nspawn-settings.c +++ b/src/nspawn/nspawn-settings.c @@ -90,6 +90,8 @@ Settings* settings_free(Settings *s) { strv_free(s->parameters); strv_free(s->environment); free(s->user); + free(s->pivot_root_new); + free(s->pivot_root_old); free(s->working_directory); strv_free(s->network_interfaces); @@ -237,6 +239,34 @@ int config_parse_id128( return 0; } +int config_parse_pivot_root( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue); + return 0; + } + + return 0; +} + int config_parse_bind( const char *unit, const char *filename, diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h index e9ea087191..021403258f 100644 --- a/src/nspawn/nspawn-settings.h +++ b/src/nspawn/nspawn-settings.h @@ -57,7 +57,8 @@ typedef enum SettingsMask { SETTING_WORKING_DIRECTORY = 1 << 12, SETTING_USERNS = 1 << 13, SETTING_NOTIFY_READY = 1 << 14, - _SETTINGS_MASK_ALL = (1 << 15) -1 + SETTING_PIVOT_ROOT = 1 << 15, + _SETTINGS_MASK_ALL = (1 << 16) -1 } SettingsMask; typedef struct Settings { @@ -72,6 +73,8 @@ typedef struct Settings { unsigned long personality; sd_id128_t machine_id; char *working_directory; + char *pivot_root_new; + char *pivot_root_old; UserNamespaceMode userns_mode; uid_t uid_shift, uid_range; bool notify_ready; @@ -109,6 +112,7 @@ int config_parse_capability(const char *unit, const char *filename, unsigned lin int config_parse_id128(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_expose_port(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_pivot_root(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_overlay(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 5594b87efa..a8d33ad907 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -132,6 +132,8 @@ typedef enum LinkJournal { static char *arg_directory = NULL; static char *arg_template = NULL; static char *arg_chdir = NULL; +static char *arg_pivot_root_new = NULL; +static char *arg_pivot_root_old = NULL; static char *arg_user = NULL; static sd_id128_t arg_uuid = {}; static char *arg_machine = NULL; @@ -221,6 +223,8 @@ static void help(void) { " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n" " -b --boot Boot up full system (i.e. invoke init)\n" " --chdir=PATH Set working directory in the container\n" + " --pivot-root=PATH[:PATH]\n" + " Pivot root to given directory in the container\n" " -u --user=USER Run the command under specified user or uid\n" " -M --machine=NAME Set the machine name for the container\n" " --uuid=UUID Set a specific machine UUID for the container\n" @@ -427,6 +431,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_KILL_SIGNAL, ARG_SETTINGS, ARG_CHDIR, + ARG_PIVOT_ROOT, ARG_PRIVATE_USERS_CHOWN, ARG_NOTIFY_READY, ARG_ROOT_HASH, @@ -478,6 +483,7 @@ static int parse_argv(int argc, char *argv[]) { { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, { "settings", required_argument, NULL, ARG_SETTINGS }, { "chdir", required_argument, NULL, ARG_CHDIR }, + { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT }, { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY }, { "root-hash", required_argument, NULL, ARG_ROOT_HASH }, {} @@ -1012,6 +1018,14 @@ static int parse_argv(int argc, char *argv[]) { arg_settings_mask |= SETTING_WORKING_DIRECTORY; break; + case ARG_PIVOT_ROOT: + r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg); + + arg_settings_mask |= SETTING_PIVOT_ROOT; + break; + case ARG_NOTIFY_READY: r = parse_boolean(optarg); if (r < 0) { @@ -2493,6 +2507,13 @@ static int outer_child( if (r < 0) return r; + r = setup_pivot_root( + directory, + arg_pivot_root_new, + arg_pivot_root_old); + if (r < 0) + return r; + r = setup_volatile( directory, arg_volatile_mode, @@ -2915,6 +2936,12 @@ static int load_settings(void) { settings->parameters = NULL; } + if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 && + settings->pivot_root_new) { + free_and_replace(arg_pivot_root_new, settings->pivot_root_new); + free_and_replace(arg_pivot_root_old, settings->pivot_root_old); + } + if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 && settings->working_directory) { free(arg_chdir); @@ -3915,6 +3942,8 @@ finish: free(arg_image); free(arg_machine); free(arg_user); + free(arg_pivot_root_new); + free(arg_pivot_root_old); free(arg_chdir); strv_free(arg_setenv); free(arg_network_bridge);