nspawn: Add support for sysroot pivoting (#5258)

Add a new --pivot-root argument to systemd-nspawn, which specifies a
directory to pivot to / inside the container; while the original / is
pivoted to another specified directory (if provided). This adds
support for booting container images which may contain several bootable
sysroots, as is common with OSTree disk images. When these disk images
are booted on real hardware, ostree-prepare-root is run in conjunction
with sysroot.mount in the initramfs to achieve the same results.
This commit is contained in:
Philip Withnall 2017-02-08 15:54:31 +00:00 committed by Lennart Poettering
parent 6a909d41e1
commit b53ede699c
8 changed files with 211 additions and 1 deletions

View File

@ -335,6 +335,21 @@
an absolute path in the container's file system namespace.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--pivot-root=</option></term>
<listitem><para>Pivot the specified directory to <filename>/</filename> inside the container, and either unmount the
container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the
specified path will be pivoted to <filename>/</filename> and the old root will be unmounted; or a colon-separated pair
of new root path and pivot destination for the old root. The new root path will be pivoted to <filename>/</filename>,
and the old <filename>/</filename> will be pivoted to the other directory. Both paths must be absolute, and are resolved
in the container's file system namespace.</para>
<para>This is for containers which have several bootable directories in them; for example, several
<ulink url="https://ostree.readthedocs.io/en/latest/">OSTree</ulink> deployments. It emulates the behavior of the boot
loader and initial RAM disk which normally select which directory to mount as root and start the container's PID 1 in.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>-u</option></term>
<term><option>--user=</option></term>
@ -1082,6 +1097,12 @@
<programlisting># chcon system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -R /srv/container
# systemd-nspawn -L system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -Z system_u:system_r:svirt_lxc_net_t:s0:c0,c1 -D /srv/container /bin/sh</programlisting>
</example>
<example>
<title>Run a container with an OSTree deployment</title>
<programlisting># systemd-nspawn -b -i ~/image.raw --pivot-root=/ostree/deploy/$OS/deploy/$CHECKSUM:/sysroot --bind=+/sysroot/ostree/deploy/$OS/var:/var</programlisting>
</example>
</refsect1>
<refsect1>

View File

@ -201,6 +201,15 @@
switch.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>PivotRoot=</varname></term>
<listitem><para>Selects a directory to pivot to <filename>/</filename> inside the container when starting up.
Takes a single path, or a pair of two paths separated by a colon. Both paths must be absolute, and are resolved
in the container's file system namespace. This corresponds to the <option>--pivot-root=</option> command line
switch.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>Capability=</varname></term>
<term><varname>DropCapability=</varname></term>

View File

@ -26,6 +26,7 @@ Exec.KillSignal, config_parse_signal, 0, offsetof(Settings,
Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
Exec.PivotRoot, config_parse_pivot_root, 0, 0
Exec.PrivateUsers, config_parse_private_users, 0, 0
Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready)
Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)

View File

@ -1349,3 +1349,116 @@ fail:
(void) rmdir(template);
return r;
}
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
_cleanup_free_ char *root_new = NULL, *root_old = NULL;
const char *p = s;
int r;
assert(pivot_root_new);
assert(pivot_root_old);
r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
if (r < 0)
return r;
if (r == 0)
return -EINVAL;
if (isempty(p))
root_old = NULL;
else {
root_old = strdup(p);
if (!root_old)
return -ENOMEM;
}
if (!path_is_absolute(root_new))
return -EINVAL;
if (root_old && !path_is_absolute(root_old))
return -EINVAL;
free_and_replace(*pivot_root_new, root_new);
free_and_replace(*pivot_root_old, root_old);
return 0;
}
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
_cleanup_free_ char *directory_pivot_root_new = NULL;
_cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
bool remove_pivot_tmp = false;
int r;
assert(directory);
if (!pivot_root_new)
return 0;
/* Pivot pivot_root_new to / and the existing / to pivot_root_old.
* If pivot_root_old is NULL, the existing / disappears.
* This requires a temporary directory, pivot_tmp, which is
* not a child of either.
*
* This is typically used for OSTree-style containers, where
* the root partition contains several sysroots which could be
* run. Normally, one would be chosen by the bootloader and
* pivoted to / by initramfs.
*
* For example, for an OSTree deployment, pivot_root_new
* would be: /ostree/deploy/$os/deploy/$checksum. Note that this
* code doesnt do the /var mount which OSTree expects: use
* --bind +/sysroot/ostree/deploy/$os/var:/var for that.
*
* So in the OSTree case, well end up with something like:
* - directory = /tmp/nspawn-root-123456
* - pivot_root_new = /ostree/deploy/os/deploy/123abc
* - pivot_root_old = /sysroot
* - directory_pivot_root_new =
* /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
* - pivot_tmp = /tmp/nspawn-pivot-123456
* - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
*
* Requires all file systems at directory and below to be mounted
* MS_PRIVATE or MS_SLAVE so they can be moved.
*/
directory_pivot_root_new = prefix_root(directory, pivot_root_new);
/* Remount directory_pivot_root_new to make it movable. */
r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
if (r < 0)
goto done;
if (pivot_root_old) {
if (!mkdtemp(pivot_tmp)) {
r = log_error_errno(errno, "Failed to create temporary directory: %m");
goto done;
}
remove_pivot_tmp = true;
pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old);
r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
if (r < 0)
goto done;
r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
if (r < 0)
goto done;
r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
if (r < 0)
goto done;
} else {
r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
if (r < 0)
goto done;
}
done:
if (remove_pivot_tmp)
(void) rmdir(pivot_tmp);
return r;
}

View File

@ -70,3 +70,6 @@ int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns,
int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);

View File

@ -90,6 +90,8 @@ Settings* settings_free(Settings *s) {
strv_free(s->parameters);
strv_free(s->environment);
free(s->user);
free(s->pivot_root_new);
free(s->pivot_root_old);
free(s->working_directory);
strv_free(s->network_interfaces);
@ -237,6 +239,34 @@ int config_parse_id128(
return 0;
}
int config_parse_pivot_root(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
Settings *settings = data;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue);
return 0;
}
return 0;
}
int config_parse_bind(
const char *unit,
const char *filename,

View File

@ -57,7 +57,8 @@ typedef enum SettingsMask {
SETTING_WORKING_DIRECTORY = 1 << 12,
SETTING_USERNS = 1 << 13,
SETTING_NOTIFY_READY = 1 << 14,
_SETTINGS_MASK_ALL = (1 << 15) -1
SETTING_PIVOT_ROOT = 1 << 15,
_SETTINGS_MASK_ALL = (1 << 16) -1
} SettingsMask;
typedef struct Settings {
@ -72,6 +73,8 @@ typedef struct Settings {
unsigned long personality;
sd_id128_t machine_id;
char *working_directory;
char *pivot_root_new;
char *pivot_root_old;
UserNamespaceMode userns_mode;
uid_t uid_shift, uid_range;
bool notify_ready;
@ -109,6 +112,7 @@ int config_parse_capability(const char *unit, const char *filename, unsigned lin
int config_parse_id128(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_expose_port(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_pivot_root(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_overlay(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);

View File

@ -132,6 +132,8 @@ typedef enum LinkJournal {
static char *arg_directory = NULL;
static char *arg_template = NULL;
static char *arg_chdir = NULL;
static char *arg_pivot_root_new = NULL;
static char *arg_pivot_root_old = NULL;
static char *arg_user = NULL;
static sd_id128_t arg_uuid = {};
static char *arg_machine = NULL;
@ -221,6 +223,8 @@ static void help(void) {
" -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n"
" -b --boot Boot up full system (i.e. invoke init)\n"
" --chdir=PATH Set working directory in the container\n"
" --pivot-root=PATH[:PATH]\n"
" Pivot root to given directory in the container\n"
" -u --user=USER Run the command under specified user or uid\n"
" -M --machine=NAME Set the machine name for the container\n"
" --uuid=UUID Set a specific machine UUID for the container\n"
@ -427,6 +431,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_KILL_SIGNAL,
ARG_SETTINGS,
ARG_CHDIR,
ARG_PIVOT_ROOT,
ARG_PRIVATE_USERS_CHOWN,
ARG_NOTIFY_READY,
ARG_ROOT_HASH,
@ -478,6 +483,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
{ "settings", required_argument, NULL, ARG_SETTINGS },
{ "chdir", required_argument, NULL, ARG_CHDIR },
{ "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
{ "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
{ "root-hash", required_argument, NULL, ARG_ROOT_HASH },
{}
@ -1012,6 +1018,14 @@ static int parse_argv(int argc, char *argv[]) {
arg_settings_mask |= SETTING_WORKING_DIRECTORY;
break;
case ARG_PIVOT_ROOT:
r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg);
if (r < 0)
return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg);
arg_settings_mask |= SETTING_PIVOT_ROOT;
break;
case ARG_NOTIFY_READY:
r = parse_boolean(optarg);
if (r < 0) {
@ -2493,6 +2507,13 @@ static int outer_child(
if (r < 0)
return r;
r = setup_pivot_root(
directory,
arg_pivot_root_new,
arg_pivot_root_old);
if (r < 0)
return r;
r = setup_volatile(
directory,
arg_volatile_mode,
@ -2915,6 +2936,12 @@ static int load_settings(void) {
settings->parameters = NULL;
}
if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 &&
settings->pivot_root_new) {
free_and_replace(arg_pivot_root_new, settings->pivot_root_new);
free_and_replace(arg_pivot_root_old, settings->pivot_root_old);
}
if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 &&
settings->working_directory) {
free(arg_chdir);
@ -3915,6 +3942,8 @@ finish:
free(arg_image);
free(arg_machine);
free(arg_user);
free(arg_pivot_root_new);
free(arg_pivot_root_old);
free(arg_chdir);
strv_free(arg_setenv);
free(arg_network_bridge);