Merge pull request #11243 from poettering/nspawn-root-overlay
add systemd-nspawn --volatile=overlay support, as well as the same for host systems
This commit is contained in:
commit
cc5fc36aec
|
@ -137,10 +137,13 @@
|
||||||
enables fully state-less boots were the vendor-supplied OS is used as shipped, with only default
|
enables fully state-less boots were the vendor-supplied OS is used as shipped, with only default
|
||||||
configuration and no stored state in effect, as <filename>/etc</filename> and <filename>/var</filename> (as
|
configuration and no stored state in effect, as <filename>/etc</filename> and <filename>/var</filename> (as
|
||||||
well as all other resources shipped in the root file system) are reset at boot and lost on shutdown. If this
|
well as all other resources shipped in the root file system) are reset at boot and lost on shutdown. If this
|
||||||
setting is set to <literal>state</literal> the root file system is mounted as usual, however
|
setting is set to <literal>state</literal> the root file system is mounted read-only, however
|
||||||
<filename>/var</filename> is mounted as a volatile memory file system (<literal>tmpfs</literal>), so that the
|
<filename>/var</filename> is mounted as a volatile memory file system (<literal>tmpfs</literal>), so that the
|
||||||
system boots up with the normal configuration applied, but all state reset at boot and lost at shutdown. For details,
|
system boots up with the normal configuration applied, but all state reset at boot and lost at shutdown. If
|
||||||
see
|
this setting is set to <literal>overlay</literal> the root file system is set up as
|
||||||
|
<literal>overlayfs</literal> mount combining the read-only root directory with a writable
|
||||||
|
<literal>tmpfs</literal>, so that no modifications are made to disk, but the file system may be modified
|
||||||
|
nonetheless with all changes being lost at reboot. For details, see
|
||||||
<citerefentry><refentrytitle>systemd-volatile-root.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>
|
<citerefentry><refentrytitle>systemd-volatile-root.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>
|
||||||
and
|
and
|
||||||
<citerefentry><refentrytitle>systemd-fstab-generator</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
|
<citerefentry><refentrytitle>systemd-fstab-generator</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
|
||||||
|
|
|
@ -175,19 +175,25 @@
|
||||||
lost at shutdown, as <filename>/etc</filename> and <filename>/var</filename> will be served from the (initially
|
lost at shutdown, as <filename>/etc</filename> and <filename>/var</filename> will be served from the (initially
|
||||||
unpopulated) volatile memory file system.</para>
|
unpopulated) volatile memory file system.</para>
|
||||||
|
|
||||||
<para>If set to <option>state</option> the generator will leave the root
|
<para>If set to <option>state</option> the generator will leave the root directory mount point unaltered,
|
||||||
directory mount point unaltered, however will mount a <literal>tmpfs</literal> file system to
|
however will mount a <literal>tmpfs</literal> file system to <filename>/var</filename>. In this mode the normal
|
||||||
<filename>/var</filename>. In this mode the normal system configuration (i.e. the contents of
|
system configuration (i.e. the contents of <literal>/etc</literal>) is in effect (and may be modified during
|
||||||
<literal>/etc</literal>) is in effect (and may be modified during system runtime), however the system state
|
system runtime), however the system state (i.e. the contents of <literal>/var</literal>) is reset at boot and
|
||||||
(i.e. the contents of <literal>/var</literal>) is reset at boot and lost at shutdown.</para>
|
lost at shutdown.</para>
|
||||||
|
|
||||||
|
<para>If this setting is set to <literal>overlay</literal> the root file system is set up as
|
||||||
|
<literal>overlayfs</literal> mount combining the read-only root directory with a writable
|
||||||
|
<literal>tmpfs</literal>, so that no modifications are made to disk, but the file system may be modified
|
||||||
|
nonetheless with all changes being lost at reboot.</para>
|
||||||
|
|
||||||
<para>Note that in none of these modes the root directory, <filename>/etc</filename>, <filename>/var</filename>
|
<para>Note that in none of these modes the root directory, <filename>/etc</filename>, <filename>/var</filename>
|
||||||
or any other resources stored in the root file system are physically removed. It's thus safe to boot a system
|
or any other resources stored in the root file system are physically removed. It's thus safe to boot a system
|
||||||
that is normally operated in non-volatile mode temporarily into volatile mode, without losing data.</para>
|
that is normally operated in non-volatile mode temporarily into volatile mode, without losing data.</para>
|
||||||
|
|
||||||
<para>Note that enabling this setting will only work correctly on operating systems that can boot up with only
|
<para>Note that with the exception of <literal>overlay</literal> mode, enabling this setting will only work
|
||||||
<filename>/usr</filename> mounted, and are able to automatically populate <filename>/etc</filename>, and also
|
correctly on operating systems that can boot up with only <filename>/usr</filename> mounted, and are able to
|
||||||
<filename>/var</filename> in case of <literal>systemd.volatile=yes</literal>.</para></listitem>
|
automatically populate <filename>/etc</filename>, and also <filename>/var</filename> in case of
|
||||||
|
<literal>systemd.volatile=yes</literal>.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
|
@ -167,9 +167,9 @@
|
||||||
template path refers to the root of a <literal>btrfs</literal> subvolume, in which case a simple copy-on-write
|
template path refers to the root of a <literal>btrfs</literal> subvolume, in which case a simple copy-on-write
|
||||||
snapshot is taken, and populating the root directory is instant. If the specified template path does not refer
|
snapshot is taken, and populating the root directory is instant. If the specified template path does not refer
|
||||||
to the root of a <literal>btrfs</literal> subvolume (or not even to a <literal>btrfs</literal> file system at
|
to the root of a <literal>btrfs</literal> subvolume (or not even to a <literal>btrfs</literal> file system at
|
||||||
all), the tree is copied (though possibly in a copy-on-write scheme — if the file system supports that), which
|
all), the tree is copied (though possibly in a 'reflink' copy-on-write scheme — if the file system supports
|
||||||
can be substantially more time-consuming. May not be specified together with <option>--image=</option> or
|
that), which can be substantially more time-consuming. May not be specified together with
|
||||||
<option>--ephemeral</option>.</para>
|
<option>--image=</option> or <option>--ephemeral</option>.</para>
|
||||||
|
|
||||||
<para>Note that this switch leaves host name, machine ID and
|
<para>Note that this switch leaves host name, machine ID and
|
||||||
all other settings that could identify the instance
|
all other settings that could identify the instance
|
||||||
|
@ -183,9 +183,16 @@
|
||||||
<listitem><para>If specified, the container is run with a temporary snapshot of its file system that is removed
|
<listitem><para>If specified, the container is run with a temporary snapshot of its file system that is removed
|
||||||
immediately when the container terminates. May not be specified together with
|
immediately when the container terminates. May not be specified together with
|
||||||
<option>--template=</option>.</para>
|
<option>--template=</option>.</para>
|
||||||
<para>Note that this switch leaves host name, machine ID and
|
<para>Note that this switch leaves host name, machine ID and all other settings that could identify the
|
||||||
all other settings that could identify the instance
|
instance unmodified. Please note that — as with <option>--template=</option> — taking the temporary snapshot is
|
||||||
unmodified.</para></listitem>
|
more efficient on file systems that support subvolume snapshots or 'reflinks' naively (<literal>btrfs</literal>
|
||||||
|
or new <literal>xfs</literal>) than on more traditional file systems that do not
|
||||||
|
(<literal>ext4</literal>).</para>
|
||||||
|
|
||||||
|
<para>With this option no modifications of the container image are retained. Use
|
||||||
|
<option>--volatile=</option> (described below) for other mechanisms to restrict persistency of
|
||||||
|
container images during runtime.</para>
|
||||||
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
@ -899,8 +906,12 @@
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--read-only</option></term>
|
<term><option>--read-only</option></term>
|
||||||
|
|
||||||
<listitem><para>Mount the root file system read-only for the
|
<listitem><para>Mount the container's root file system (and any other file systems container in the container
|
||||||
container.</para></listitem>
|
image) read-only. This has no effect on additional mounts made with <option>--bind=</option>,
|
||||||
|
<option>--tmpfs=</option> and similar options. This mode is implied if the container image file or directory is
|
||||||
|
marked read-only itself. It is also implied if <option>--volatile=</option> is used. In this case the container
|
||||||
|
image on disk is strictly read-only, while changes are permitted but kept non-persistently in memory only. For
|
||||||
|
further details, see below.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
@ -931,20 +942,16 @@
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--tmpfs=</option></term>
|
<term><option>--tmpfs=</option></term>
|
||||||
|
|
||||||
<listitem><para>Mount a tmpfs file system into the container.
|
<listitem><para>Mount a tmpfs file system into the container. Takes a single absolute path argument that
|
||||||
Takes a single absolute path argument that specifies where to
|
specifies where to mount the tmpfs instance to (in which case the directory access mode will be chosen as 0755,
|
||||||
mount the tmpfs instance to (in which case the directory
|
owned by root/root), or optionally a colon-separated pair of path and mount option string that is used for
|
||||||
access mode will be chosen as 0755, owned by root/root), or
|
mounting (in which case the kernel default for access mode and owner will be chosen, unless otherwise
|
||||||
optionally a colon-separated pair of path and mount option
|
specified). Backslash escapes are interpreted in the path, so <literal>\:</literal> may be used to embed colons
|
||||||
string that is used for mounting (in which case the kernel
|
in the path.</para>
|
||||||
default for access mode and owner will be chosen, unless
|
|
||||||
otherwise specified). This option is particularly useful for
|
<para>Note that this option cannot be used to replace the root file system of the container with a temporary
|
||||||
mounting directories such as <filename>/var</filename> as
|
file system. However, the <option>--volatile=</option> option described below provides similar
|
||||||
tmpfs, to allow state-less systems, in particular when
|
functionality, with a focus on implementing stateless operating system images.</para></listitem>
|
||||||
combined with <option>--read-only</option>.
|
|
||||||
Backslash escapes are interpreted in the path, so
|
|
||||||
<literal>\:</literal> may be used to embed colons in the path.
|
|
||||||
</para></listitem>
|
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
@ -1002,7 +1009,11 @@
|
||||||
be on the same file system as the top-most directory
|
be on the same file system as the top-most directory
|
||||||
tree). Also note that the <literal>lowerdir=</literal> mount
|
tree). Also note that the <literal>lowerdir=</literal> mount
|
||||||
option receives the paths to stack in the opposite order of
|
option receives the paths to stack in the opposite order of
|
||||||
this switch.</para></listitem>
|
this switch.</para>
|
||||||
|
|
||||||
|
<para>Note that this option cannot be used to replace the root file system of the container with an overlay
|
||||||
|
file system. However, the <option>--volatile=</option> option described below provides similar functionality,
|
||||||
|
with a focus on implementing stateless operating system images.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
@ -1074,33 +1085,49 @@
|
||||||
<term><option>--volatile</option></term>
|
<term><option>--volatile</option></term>
|
||||||
<term><option>--volatile=</option><replaceable>MODE</replaceable></term>
|
<term><option>--volatile=</option><replaceable>MODE</replaceable></term>
|
||||||
|
|
||||||
<listitem><para>Boots the container in volatile mode. When no
|
<listitem><para>Boots the container in volatile mode. When no mode parameter is passed or when mode is
|
||||||
mode parameter is passed or when mode is specified as
|
specified as <option>yes</option>, full volatile mode is enabled. This means the root directory is mounted as a
|
||||||
<option>yes</option>, full volatile mode is enabled. This
|
mostly unpopulated <literal>tmpfs</literal> instance, and <filename>/usr/</filename> from the OS tree is
|
||||||
means the root directory is mounted as a mostly unpopulated
|
mounted into it in read-only mode (the system thus starts up with read-only OS image, but pristine state and
|
||||||
<literal>tmpfs</literal> instance, and
|
configuration, any changes are lost on shutdown). When the mode parameter is specified as
|
||||||
<filename>/usr</filename> from the OS tree is mounted into it
|
<option>state</option>, the OS tree is mounted read-only, but <filename>/var/</filename> is mounted as a
|
||||||
in read-only mode (the system thus starts up with read-only OS
|
writable <literal>tmpfs</literal> instance into it (the system thus starts up with read-only OS resources and
|
||||||
image, but pristine state and configuration, any changes
|
configuration, but pristine state, and any changes to the latter are lost on shutdown). When the mode parameter
|
||||||
are lost on shutdown). When the mode parameter
|
is specified as <option>overlay</option> the read-only root file system is combined with a writable
|
||||||
is specified as <option>state</option>, the OS tree is
|
<filename>tmpfs</filename> instance through <literal>overlayfs</literal>, so that it appears at it normally
|
||||||
mounted read-only, but <filename>/var</filename> is mounted as
|
would, but any changes are applied to the temporary file system only and lost when the container is
|
||||||
a <literal>tmpfs</literal> instance into it (the system thus
|
terminated. When the mode parameter is specified as <option>no</option> (the default), the whole OS tree is
|
||||||
starts up with read-only OS resources and configuration, but
|
made available writable (unless <option>--read-only</option> is specified, see above).</para>
|
||||||
pristine state, and any changes to the latter are lost on
|
|
||||||
shutdown). When the mode parameter is specified as
|
<para>Note that if one of the volatile modes is chosen, its effect is limited to the root file system (or
|
||||||
<option>no</option> (the default), the whole OS tree is made
|
<filename>/var/</filename> in case of <option>state</option>), and any other mounts placed in the hierarchy are
|
||||||
available writable.</para>
|
unaffected — regardless if they are established automatically (e.g. the EFI system partition that might be
|
||||||
|
mounted to <filename>/efi/</filename> or <filename>/boot/</filename>) or explicitly (e.g. through an additional
|
||||||
|
command line option such as <option>--bind=</option>, see above). This means, even if
|
||||||
|
<option>--volatile=overlay</option> is used changes to <filename>/efi/</filename> or
|
||||||
|
<filename>/boot/</filename> are prohibited in case such a partition exists in the container image operated on,
|
||||||
|
and even if <option>--volatile=state</option> is used the hypothetical file <filename>/etc/foobar</filename> is
|
||||||
|
potentially writable if <option>--bind=/etc/foobar</option> if used to mount it from outside the read-only
|
||||||
|
container <filename>/etc</filename> directory.</para>
|
||||||
|
|
||||||
|
<para>The <option>--ephemeral</option> option is closely related to this setting, and provides similar
|
||||||
|
behaviour by making a temporary, ephemeral copy of the whole OS image and executing that. For further details,
|
||||||
|
see above.</para>
|
||||||
|
|
||||||
|
<para>The <option>--tmpfs=</option> and <option>--overlay=</option> options provide similar functionality, but
|
||||||
|
for specific sub-directories of the OS image only. For details, see above.</para>
|
||||||
|
|
||||||
<para>This option provides similar functionality for containers as the <literal>systemd.volatile=</literal>
|
<para>This option provides similar functionality for containers as the <literal>systemd.volatile=</literal>
|
||||||
kernel command line switch provides for host systems. See
|
kernel command line switch provides for host systems. See
|
||||||
<citerefentry><refentrytitle>kernel-command-line</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
|
<citerefentry><refentrytitle>kernel-command-line</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
|
||||||
details.</para>
|
details.</para>
|
||||||
|
|
||||||
<para>Note that enabling this setting will only work correctly with operating systems in the container that can
|
<para>Note that setting this option to <option>yes</option> or <option>state</option> will only work correctly
|
||||||
boot up with only <filename>/usr</filename> mounted, and are able to automatically populate
|
with operating systems in the container that can boot up with only <filename>/usr</filename> mounted, and are
|
||||||
<filename>/var</filename>, and also <filename>/etc</filename> in case of
|
able to automatically populate <filename>/var</filename>, and also <filename>/etc</filename> in case of
|
||||||
<literal>--volatile=yes</literal>.</para></listitem>
|
<literal>--volatile=yes</literal>. The <option>overlay</option> option does not require any particular
|
||||||
|
preparations in the OS, but do note that <literal>overlayfs</literal> behaviour differs from regular file
|
||||||
|
systems in a number of ways, and hence compatibility is limited.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
|
|
|
@ -743,7 +743,7 @@ int copy_file_fd_full(
|
||||||
|
|
||||||
r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata);
|
r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata);
|
||||||
|
|
||||||
(void) copy_times(fdf, fdt);
|
(void) copy_times(fdf, fdt, copy_flags);
|
||||||
(void) copy_xattr(fdf, fdt);
|
(void) copy_xattr(fdf, fdt);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
@ -849,10 +849,9 @@ int copy_file_atomic_full(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int copy_times(int fdf, int fdt) {
|
int copy_times(int fdf, int fdt, CopyFlags flags) {
|
||||||
struct timespec ut[2];
|
struct timespec ut[2];
|
||||||
struct stat st;
|
struct stat st;
|
||||||
usec_t crtime = 0;
|
|
||||||
|
|
||||||
assert(fdf >= 0);
|
assert(fdf >= 0);
|
||||||
assert(fdt >= 0);
|
assert(fdt >= 0);
|
||||||
|
@ -866,8 +865,12 @@ int copy_times(int fdf, int fdt) {
|
||||||
if (futimens(fdt, ut) < 0)
|
if (futimens(fdt, ut) < 0)
|
||||||
return -errno;
|
return -errno;
|
||||||
|
|
||||||
if (fd_getcrtime(fdf, &crtime) >= 0)
|
if (FLAGS_SET(flags, COPY_CRTIME)) {
|
||||||
(void) fd_setcrtime(fdt, crtime);
|
usec_t crtime;
|
||||||
|
|
||||||
|
if (fd_getcrtime(fdf, &crtime) >= 0)
|
||||||
|
(void) fd_setcrtime(fdt, crtime);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ typedef enum CopyFlags {
|
||||||
COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */
|
COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */
|
||||||
COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */
|
COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */
|
||||||
COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */
|
COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */
|
||||||
|
COPY_CRTIME = 1 << 5, /* Generate a user.crtime_usec xattr off the source crtime if there is one, on copying */
|
||||||
} CopyFlags;
|
} CopyFlags;
|
||||||
|
|
||||||
typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);
|
typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);
|
||||||
|
@ -57,5 +58,5 @@ static inline int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags cop
|
||||||
return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL);
|
return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
int copy_times(int fdf, int fdt);
|
int copy_times(int fdf, int fdt, CopyFlags flags);
|
||||||
int copy_xattr(int fdf, int fdt);
|
int copy_xattr(int fdf, int fdt);
|
||||||
|
|
|
@ -494,7 +494,7 @@ static int copy_file_with_version_check(const char *from, const char *to, bool f
|
||||||
return log_error_errno(r, "Failed to copy data from \"%s\" to \"%s\": %m", from, t);
|
return log_error_errno(r, "Failed to copy data from \"%s\" to \"%s\": %m", from, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) copy_times(fd_from, fd_to);
|
(void) copy_times(fd_from, fd_to, 0);
|
||||||
|
|
||||||
if (fsync(fd_to) < 0) {
|
if (fsync(fd_to) < 0) {
|
||||||
(void) unlink_noerrno(t);
|
(void) unlink_noerrno(t);
|
||||||
|
|
|
@ -722,10 +722,11 @@ static int add_sysroot_usr_mount(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_volatile_root(void) {
|
static int add_volatile_root(void) {
|
||||||
/* Let's add in systemd-remount-volatile.service which will remount the root device to tmpfs if this is
|
|
||||||
* requested, leaving only /usr from the root mount inside. */
|
|
||||||
|
|
||||||
if (arg_volatile_mode != VOLATILE_YES)
|
/* Let's add in systemd-remount-volatile.service which will remount the root device to tmpfs if this is
|
||||||
|
* requested (or as an overlayfs), leaving only /usr from the root mount inside. */
|
||||||
|
|
||||||
|
if (!IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_OVERLAY))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return generator_add_symlink(arg_dest, SPECIAL_INITRD_ROOT_FS_TARGET, "requires",
|
return generator_add_symlink(arg_dest, SPECIAL_INITRD_ROOT_FS_TARGET, "requires",
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "efivars.h"
|
#include "efivars.h"
|
||||||
#include "fd-util.h"
|
#include "fd-util.h"
|
||||||
#include "fileio.h"
|
#include "fileio.h"
|
||||||
|
#include "fs-util.h"
|
||||||
#include "fstab-util.h"
|
#include "fstab-util.h"
|
||||||
#include "generator.h"
|
#include "generator.h"
|
||||||
#include "gpt.h"
|
#include "gpt.h"
|
||||||
|
@ -533,7 +534,7 @@ static int add_root_rw(DissectedPartition *p) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int open_parent(dev_t devnum, int *ret) {
|
static int open_parent_devno(dev_t devnum, int *ret) {
|
||||||
_cleanup_(sd_device_unrefp) sd_device *d = NULL;
|
_cleanup_(sd_device_unrefp) sd_device *d = NULL;
|
||||||
const char *name, *devtype, *node;
|
const char *name, *devtype, *node;
|
||||||
sd_device *parent;
|
sd_device *parent;
|
||||||
|
@ -601,7 +602,7 @@ static int enumerate_partitions(dev_t devnum) {
|
||||||
_cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
|
_cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
|
||||||
int r, k;
|
int r, k;
|
||||||
|
|
||||||
r = open_parent(devnum, &fd);
|
r = open_parent_devno(devnum, &fd);
|
||||||
if (r <= 0)
|
if (r <= 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@ -763,8 +764,25 @@ static int add_mounts(void) {
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to determine block device of /usr file system: %m");
|
return log_error_errno(r, "Failed to determine block device of /usr file system: %m");
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
log_debug("Neither root nor /usr file system are on a (single) block device.");
|
_cleanup_free_ char *p = NULL;
|
||||||
return 0;
|
mode_t m;
|
||||||
|
|
||||||
|
/* If the root mount has been replaced by some form of volatile file system (overlayfs), the
|
||||||
|
* original root block device node is symlinked in /run/systemd/volatile-root. Let's read that
|
||||||
|
* here. */
|
||||||
|
r = readlink_malloc("/run/systemd/volatile-root", &p);
|
||||||
|
if (r == -ENOENT) {
|
||||||
|
log_debug("Neither root nor /usr file system are on a (single) block device.");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m");
|
||||||
|
|
||||||
|
r = device_path_parse_major_minor(p, &m, &devno);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to parse major/minor device node: %m");
|
||||||
|
if (!S_ISBLK(m))
|
||||||
|
return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Volatile root device is of wrong type.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -223,7 +223,7 @@ static int raw_export_process(RawExport *e) {
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
if (r >= 0) {
|
if (r >= 0) {
|
||||||
(void) copy_times(e->input_fd, e->output_fd);
|
(void) copy_times(e->input_fd, e->output_fd, COPY_CRTIME);
|
||||||
(void) copy_xattr(e->input_fd, e->output_fd);
|
(void) copy_xattr(e->input_fd, e->output_fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -215,7 +215,7 @@ static int raw_import_finish(RawImport *i) {
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
if (S_ISREG(i->st.st_mode)) {
|
if (S_ISREG(i->st.st_mode)) {
|
||||||
(void) copy_times(i->input_fd, i->output_fd);
|
(void) copy_times(i->input_fd, i->output_fd, COPY_CRTIME);
|
||||||
(void) copy_xattr(i->input_fd, i->output_fd);
|
(void) copy_xattr(i->input_fd, i->output_fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -368,7 +368,7 @@ static int raw_pull_make_local_copy(RawPull *i) {
|
||||||
return log_error_errno(r, "Failed to make writable copy of image: %m");
|
return log_error_errno(r, "Failed to make writable copy of image: %m");
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) copy_times(i->raw_job->disk_fd, dfd);
|
(void) copy_times(i->raw_job->disk_fd, dfd, COPY_CRTIME);
|
||||||
(void) copy_xattr(i->raw_job->disk_fd, dfd);
|
(void) copy_xattr(i->raw_job->disk_fd, dfd);
|
||||||
|
|
||||||
dfd = safe_close(dfd);
|
dfd = safe_close(dfd);
|
||||||
|
|
|
@ -212,6 +212,8 @@ int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only)
|
||||||
|
|
||||||
if (!path_is_absolute(destination))
|
if (!path_is_absolute(destination))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (empty_or_root(destination))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
|
m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
|
||||||
if (!m)
|
if (!m)
|
||||||
|
@ -251,6 +253,8 @@ int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
|
||||||
|
|
||||||
if (!path_is_absolute(path))
|
if (!path_is_absolute(path))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (empty_or_root(path))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
|
m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
|
||||||
if (!m)
|
if (!m)
|
||||||
|
@ -310,6 +314,9 @@ int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_onl
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (empty_or_root(destination))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
|
m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
|
||||||
if (!m)
|
if (!m)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -849,9 +856,8 @@ int mount_custom(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int setup_volatile_state(
|
static int setup_volatile_state(
|
||||||
const char *directory,
|
const char *directory,
|
||||||
VolatileMode mode,
|
|
||||||
bool userns, uid_t uid_shift, uid_t uid_range,
|
bool userns, uid_t uid_shift, uid_t uid_range,
|
||||||
const char *selinux_apifs_context) {
|
const char *selinux_apifs_context) {
|
||||||
|
|
||||||
|
@ -861,11 +867,7 @@ int setup_volatile_state(
|
||||||
|
|
||||||
assert(directory);
|
assert(directory);
|
||||||
|
|
||||||
if (mode != VOLATILE_STATE)
|
/* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* --volatile=state means we simply overmount /var
|
|
||||||
with a tmpfs, and the rest read-only. */
|
|
||||||
|
|
||||||
r = bind_remount_recursive(directory, true, NULL);
|
r = bind_remount_recursive(directory, true, NULL);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
@ -886,9 +888,8 @@ int setup_volatile_state(
|
||||||
return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
|
return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
int setup_volatile(
|
static int setup_volatile_yes(
|
||||||
const char *directory,
|
const char *directory,
|
||||||
VolatileMode mode,
|
|
||||||
bool userns, uid_t uid_shift, uid_t uid_range,
|
bool userns, uid_t uid_shift, uid_t uid_range,
|
||||||
const char *selinux_apifs_context) {
|
const char *selinux_apifs_context) {
|
||||||
|
|
||||||
|
@ -900,11 +901,8 @@ int setup_volatile(
|
||||||
|
|
||||||
assert(directory);
|
assert(directory);
|
||||||
|
|
||||||
if (mode != VOLATILE_YES)
|
/* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and that
|
||||||
return 0;
|
read-only. */
|
||||||
|
|
||||||
/* --volatile=yes means we mount a tmpfs to the root dir, and
|
|
||||||
the original /usr to use inside it, and that read-only. */
|
|
||||||
|
|
||||||
if (!mkdtemp(template))
|
if (!mkdtemp(template))
|
||||||
return log_error_errno(errno, "Failed to create temporary directory: %m");
|
return log_error_errno(errno, "Failed to create temporary directory: %m");
|
||||||
|
@ -912,7 +910,7 @@ int setup_volatile(
|
||||||
options = "mode=755";
|
options = "mode=755";
|
||||||
r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
|
r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_oom();
|
goto fail;
|
||||||
if (r > 0)
|
if (r > 0)
|
||||||
options = buf;
|
options = buf;
|
||||||
|
|
||||||
|
@ -961,6 +959,93 @@ fail:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int setup_volatile_overlay(
|
||||||
|
const char *directory,
|
||||||
|
bool userns, uid_t uid_shift, uid_t uid_range,
|
||||||
|
const char *selinux_apifs_context) {
|
||||||
|
|
||||||
|
_cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
|
||||||
|
char template[] = "/tmp/nspawn-volatile-XXXXXX";
|
||||||
|
const char *upper, *work, *options;
|
||||||
|
bool tmpfs_mounted = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(directory);
|
||||||
|
|
||||||
|
/* --volatile=overlay means we mount an overlayfs to the root dir. */
|
||||||
|
|
||||||
|
if (!mkdtemp(template))
|
||||||
|
return log_error_errno(errno, "Failed to create temporary directory: %m");
|
||||||
|
|
||||||
|
options = "mode=755";
|
||||||
|
r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
if (r > 0)
|
||||||
|
options = buf;
|
||||||
|
|
||||||
|
r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
tmpfs_mounted = true;
|
||||||
|
|
||||||
|
upper = strjoina(template, "/upper");
|
||||||
|
work = strjoina(template, "/work");
|
||||||
|
|
||||||
|
if (mkdir(upper, 0755) < 0) {
|
||||||
|
r = log_error_errno(errno, "Failed to create %s: %m", upper);
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
if (mkdir(work, 0755) < 0) {
|
||||||
|
r = log_error_errno(errno, "Failed to create %s: %m", work);
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
|
||||||
|
* that the kernel allows us to do that without going through some mount point rearrangements. */
|
||||||
|
|
||||||
|
escaped_directory = shell_escape(directory, ",:");
|
||||||
|
escaped_upper = shell_escape(upper, ",:");
|
||||||
|
escaped_work = shell_escape(work, ",:");
|
||||||
|
if (!escaped_directory || !escaped_upper || !escaped_work) {
|
||||||
|
r = -ENOMEM;
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work);
|
||||||
|
r = mount_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options);
|
||||||
|
|
||||||
|
finish:
|
||||||
|
if (tmpfs_mounted)
|
||||||
|
(void) umount_verbose(template);
|
||||||
|
|
||||||
|
(void) rmdir(template);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int setup_volatile_mode(
|
||||||
|
const char *directory,
|
||||||
|
VolatileMode mode,
|
||||||
|
bool userns, uid_t uid_shift, uid_t uid_range,
|
||||||
|
const char *selinux_apifs_context) {
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
|
||||||
|
case VOLATILE_YES:
|
||||||
|
return setup_volatile_yes(directory, userns, uid_shift, uid_range, selinux_apifs_context);
|
||||||
|
|
||||||
|
case VOLATILE_STATE:
|
||||||
|
return setup_volatile_state(directory, userns, uid_shift, uid_range, selinux_apifs_context);
|
||||||
|
|
||||||
|
case VOLATILE_OVERLAY:
|
||||||
|
return setup_volatile_overlay(directory, userns, uid_shift, uid_range, selinux_apifs_context);
|
||||||
|
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
|
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
|
||||||
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
|
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
|
||||||
_cleanup_free_ char *root_new = NULL, *root_old = NULL;
|
_cleanup_free_ char *root_new = NULL, *root_old = NULL;
|
||||||
|
|
|
@ -49,8 +49,7 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
|
||||||
|
|
||||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
||||||
|
|
||||||
int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
int setup_volatile_mode(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
||||||
int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
|
||||||
|
|
||||||
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
|
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
|
||||||
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
|
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
|
||||||
|
|
|
@ -1308,6 +1308,9 @@ static int verify_arguments(void) {
|
||||||
if (arg_start_mode == START_BOOT && arg_kill_signal <= 0)
|
if (arg_start_mode == START_BOOT && arg_kill_signal <= 0)
|
||||||
arg_kill_signal = SIGRTMIN+3;
|
arg_kill_signal = SIGRTMIN+3;
|
||||||
|
|
||||||
|
if (arg_volatile_mode != VOLATILE_NO) /* Make sure all file systems contained in the image are mounted read-only if we are in volatile mode */
|
||||||
|
arg_read_only = true;
|
||||||
|
|
||||||
if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0)
|
if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0)
|
||||||
/* Save the user from accidentally registering either user-$SESSION.scope or user@.service.
|
/* Save the user from accidentally registering either user-$SESSION.scope or user@.service.
|
||||||
* The latter is not technically a user session, but we don't need to labour the point. */
|
* The latter is not technically a user session, but we don't need to labour the point. */
|
||||||
|
@ -1334,6 +1337,12 @@ static int verify_arguments(void) {
|
||||||
if (arg_userns_chown && arg_read_only)
|
if (arg_userns_chown && arg_read_only)
|
||||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--read-only and --private-users-chown may not be combined.");
|
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--read-only and --private-users-chown may not be combined.");
|
||||||
|
|
||||||
|
/* We don't support --private-users-chown together with any of the volatile modes since we couldn't
|
||||||
|
* change the read-only part of the tree (i.e. /usr) anyway, or because it would trigger a massive
|
||||||
|
* copy-up (in case of overlay) making the entire excercise pointless. */
|
||||||
|
if (arg_userns_chown && arg_volatile_mode != VOLATILE_NO)
|
||||||
|
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--volatile= and --private-users-chown may not be combined.");
|
||||||
|
|
||||||
/* If --network-namespace-path is given with any other network-related option,
|
/* If --network-namespace-path is given with any other network-related option,
|
||||||
* we need to error out, to avoid conflicts between different network options. */
|
* we need to error out, to avoid conflicts between different network options. */
|
||||||
if (arg_network_namespace_path &&
|
if (arg_network_namespace_path &&
|
||||||
|
@ -1352,9 +1361,6 @@ static int verify_arguments(void) {
|
||||||
if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO))
|
if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO))
|
||||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --private-users with read-write mounts.");
|
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --private-users with read-write mounts.");
|
||||||
|
|
||||||
if (arg_volatile_mode != VOLATILE_NO && arg_read_only)
|
|
||||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
|
|
||||||
|
|
||||||
if (arg_expose_ports && !arg_private_network)
|
if (arg_expose_ports && !arg_private_network)
|
||||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking.");
|
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking.");
|
||||||
|
|
||||||
|
@ -1420,6 +1426,10 @@ static const char *timezone_from_path(const char *path) {
|
||||||
"/usr/share/zoneinfo/");
|
"/usr/share/zoneinfo/");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool etc_writable(void) {
|
||||||
|
return !arg_read_only || IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_OVERLAY);
|
||||||
|
}
|
||||||
|
|
||||||
static int setup_timezone(const char *dest) {
|
static int setup_timezone(const char *dest) {
|
||||||
_cleanup_free_ char *p = NULL, *etc = NULL;
|
_cleanup_free_ char *p = NULL, *etc = NULL;
|
||||||
const char *where, *check;
|
const char *where, *check;
|
||||||
|
@ -1431,9 +1441,9 @@ static int setup_timezone(const char *dest) {
|
||||||
if (IN_SET(arg_timezone, TIMEZONE_AUTO, TIMEZONE_SYMLINK)) {
|
if (IN_SET(arg_timezone, TIMEZONE_AUTO, TIMEZONE_SYMLINK)) {
|
||||||
r = readlink_malloc("/etc/localtime", &p);
|
r = readlink_malloc("/etc/localtime", &p);
|
||||||
if (r == -ENOENT && arg_timezone == TIMEZONE_AUTO)
|
if (r == -ENOENT && arg_timezone == TIMEZONE_AUTO)
|
||||||
m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_OFF : TIMEZONE_DELETE;
|
m = etc_writable() ? TIMEZONE_DELETE : TIMEZONE_OFF;
|
||||||
else if (r == -EINVAL && arg_timezone == TIMEZONE_AUTO) /* regular file? */
|
else if (r == -EINVAL && arg_timezone == TIMEZONE_AUTO) /* regular file? */
|
||||||
m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_BIND : TIMEZONE_COPY;
|
m = etc_writable() ? TIMEZONE_COPY : TIMEZONE_BIND;
|
||||||
else if (r < 0) {
|
else if (r < 0) {
|
||||||
log_warning_errno(r, "Failed to read host's /etc/localtime symlink, not updating container timezone: %m");
|
log_warning_errno(r, "Failed to read host's /etc/localtime symlink, not updating container timezone: %m");
|
||||||
/* To handle warning, delete /etc/localtime and replace it with a symbolic link to a time zone data
|
/* To handle warning, delete /etc/localtime and replace it with a symbolic link to a time zone data
|
||||||
|
@ -1444,7 +1454,7 @@ static int setup_timezone(const char *dest) {
|
||||||
*/
|
*/
|
||||||
return 0;
|
return 0;
|
||||||
} else if (arg_timezone == TIMEZONE_AUTO)
|
} else if (arg_timezone == TIMEZONE_AUTO)
|
||||||
m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_BIND : TIMEZONE_SYMLINK;
|
m = etc_writable() ? TIMEZONE_SYMLINK : TIMEZONE_BIND;
|
||||||
else
|
else
|
||||||
m = arg_timezone;
|
m = arg_timezone;
|
||||||
} else
|
} else
|
||||||
|
@ -1606,11 +1616,11 @@ static int setup_resolv_conf(const char *dest) {
|
||||||
if (arg_private_network)
|
if (arg_private_network)
|
||||||
m = RESOLV_CONF_OFF;
|
m = RESOLV_CONF_OFF;
|
||||||
else if (have_resolv_conf(STATIC_RESOLV_CONF) > 0 && resolved_listening() > 0)
|
else if (have_resolv_conf(STATIC_RESOLV_CONF) > 0 && resolved_listening() > 0)
|
||||||
m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_BIND_STATIC : RESOLV_CONF_COPY_STATIC;
|
m = etc_writable() ? RESOLV_CONF_COPY_STATIC : RESOLV_CONF_BIND_STATIC;
|
||||||
else if (have_resolv_conf("/etc/resolv.conf") > 0)
|
else if (have_resolv_conf("/etc/resolv.conf") > 0)
|
||||||
m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_BIND_HOST : RESOLV_CONF_COPY_HOST;
|
m = etc_writable() ? RESOLV_CONF_COPY_HOST : RESOLV_CONF_BIND_HOST;
|
||||||
else
|
else
|
||||||
m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_OFF : RESOLV_CONF_DELETE;
|
m = etc_writable() ? RESOLV_CONF_DELETE : RESOLV_CONF_OFF;
|
||||||
} else
|
} else
|
||||||
m = arg_resolv_conf;
|
m = arg_resolv_conf;
|
||||||
|
|
||||||
|
@ -2896,6 +2906,30 @@ static int outer_child(
|
||||||
"Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
|
"Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!dissected_image) {
|
||||||
|
/* Turn directory into bind mount */
|
||||||
|
r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = setup_pivot_root(
|
||||||
|
directory,
|
||||||
|
arg_pivot_root_new,
|
||||||
|
arg_pivot_root_old);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = setup_volatile_mode(
|
||||||
|
directory,
|
||||||
|
arg_volatile_mode,
|
||||||
|
arg_userns_mode != USER_NAMESPACE_NO,
|
||||||
|
arg_uid_shift,
|
||||||
|
arg_uid_range,
|
||||||
|
arg_selinux_context);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
if (dissected_image) {
|
if (dissected_image) {
|
||||||
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
|
/* Now we know the uid shift, let's now mount everything else that might be in the image. */
|
||||||
r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
|
r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
|
||||||
|
@ -2921,38 +2955,6 @@ static int outer_child(
|
||||||
unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket);
|
unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Turn directory into bind mount */
|
|
||||||
r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
|
|
||||||
if (r < 0)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
r = setup_pivot_root(
|
|
||||||
directory,
|
|
||||||
arg_pivot_root_new,
|
|
||||||
arg_pivot_root_old);
|
|
||||||
if (r < 0)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
r = setup_volatile(
|
|
||||||
directory,
|
|
||||||
arg_volatile_mode,
|
|
||||||
arg_userns_mode != USER_NAMESPACE_NO,
|
|
||||||
arg_uid_shift,
|
|
||||||
arg_uid_range,
|
|
||||||
arg_selinux_context);
|
|
||||||
if (r < 0)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
r = setup_volatile_state(
|
|
||||||
directory,
|
|
||||||
arg_volatile_mode,
|
|
||||||
arg_userns_mode != USER_NAMESPACE_NO,
|
|
||||||
arg_uid_shift,
|
|
||||||
arg_uid_range,
|
|
||||||
arg_selinux_context);
|
|
||||||
if (r < 0)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
/* Mark everything as shared so our mounts get propagated down. This is
|
/* Mark everything as shared so our mounts get propagated down. This is
|
||||||
* required to make new bind mounts available in systemd services
|
* required to make new bind mounts available in systemd services
|
||||||
* inside the containter that create a new mount namespace.
|
* inside the containter that create a new mount namespace.
|
||||||
|
@ -2971,7 +2973,7 @@ static int outer_child(
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
if (arg_read_only) {
|
if (arg_read_only && arg_volatile_mode == VOLATILE_NO) {
|
||||||
r = bind_remount_recursive(directory, true, NULL);
|
r = bind_remount_recursive(directory, true, NULL);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to make tree read-only: %m");
|
return log_error_errno(r, "Failed to make tree read-only: %m");
|
||||||
|
@ -4398,7 +4400,7 @@ int main(int argc, char *argv[]) {
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = copy_file(arg_image, np, O_EXCL, arg_read_only ? 0400 : 0600, FS_NOCOW_FL, COPY_REFLINK);
|
r = copy_file(arg_image, np, O_EXCL, arg_read_only ? 0400 : 0600, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
r = log_error_errno(r, "Failed to copy image file: %m");
|
r = log_error_errno(r, "Failed to copy image file: %m");
|
||||||
goto finish;
|
goto finish;
|
||||||
|
|
|
@ -870,7 +870,7 @@ int image_clone(Image *i, const char *new_name, bool read_only) {
|
||||||
case IMAGE_RAW:
|
case IMAGE_RAW:
|
||||||
new_path = strjoina("/var/lib/machines/", new_name, ".raw");
|
new_path = strjoina("/var/lib/machines/", new_name, ".raw");
|
||||||
|
|
||||||
r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
|
r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case IMAGE_BLOCK:
|
case IMAGE_BLOCK:
|
||||||
|
|
|
@ -12,33 +12,35 @@
|
||||||
|
|
||||||
int query_volatile_mode(VolatileMode *ret) {
|
int query_volatile_mode(VolatileMode *ret) {
|
||||||
_cleanup_free_ char *mode = NULL;
|
_cleanup_free_ char *mode = NULL;
|
||||||
VolatileMode m = VOLATILE_NO;
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode);
|
r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
if (r == 0)
|
if (r == 0) {
|
||||||
goto finish;
|
*ret = VOLATILE_NO;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (mode) {
|
if (mode) {
|
||||||
|
VolatileMode m;
|
||||||
|
|
||||||
m = volatile_mode_from_string(mode);
|
m = volatile_mode_from_string(mode);
|
||||||
if (m < 0)
|
if (m < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
*ret = m;
|
||||||
} else
|
} else
|
||||||
m = VOLATILE_YES;
|
*ret = VOLATILE_YES;
|
||||||
|
|
||||||
r = 1;
|
return 1;
|
||||||
|
|
||||||
finish:
|
|
||||||
*ret = m;
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = {
|
static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = {
|
||||||
[VOLATILE_NO] = "no",
|
[VOLATILE_NO] = "no",
|
||||||
[VOLATILE_YES] = "yes",
|
[VOLATILE_YES] = "yes",
|
||||||
[VOLATILE_STATE] = "state",
|
[VOLATILE_STATE] = "state",
|
||||||
|
[VOLATILE_OVERLAY] = "overlay",
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES);
|
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES);
|
||||||
|
|
|
@ -5,6 +5,7 @@ typedef enum VolatileMode {
|
||||||
VOLATILE_NO,
|
VOLATILE_NO,
|
||||||
VOLATILE_YES,
|
VOLATILE_YES,
|
||||||
VOLATILE_STATE,
|
VOLATILE_STATE,
|
||||||
|
VOLATILE_OVERLAY,
|
||||||
_VOLATILE_MODE_MAX,
|
_VOLATILE_MODE_MAX,
|
||||||
_VOLATILE_MODE_INVALID = -1
|
_VOLATILE_MODE_INVALID = -1
|
||||||
} VolatileMode;
|
} VolatileMode;
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
|
|
||||||
#include "alloc-util.h"
|
#include "alloc-util.h"
|
||||||
|
#include "blockdev-util.h"
|
||||||
|
#include "escape.h"
|
||||||
#include "fs-util.h"
|
#include "fs-util.h"
|
||||||
#include "main-func.h"
|
#include "main-func.h"
|
||||||
#include "mkdir.h"
|
#include "mkdir.h"
|
||||||
|
@ -17,20 +19,7 @@ static int make_volatile(const char *path) {
|
||||||
_cleanup_free_ char *old_usr = NULL;
|
_cleanup_free_ char *old_usr = NULL;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW);
|
assert(path);
|
||||||
if (r < 0)
|
|
||||||
return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path);
|
|
||||||
if (r == 0)
|
|
||||||
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
|
||||||
"%s is not a mount point.", path);
|
|
||||||
|
|
||||||
r = path_is_temporary_fs(path);
|
|
||||||
if (r < 0)
|
|
||||||
return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path);
|
|
||||||
if (r > 0) {
|
|
||||||
log_info("%s already is a temporary file system.", path);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = chase_symlinks("/usr", path, CHASE_PREFIX_ROOT, &old_usr);
|
r = chase_symlinks("/usr", path, CHASE_PREFIX_ROOT, &old_usr);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
@ -45,7 +34,7 @@ static int make_volatile(const char *path) {
|
||||||
goto finish_rmdir;
|
goto finish_rmdir;
|
||||||
|
|
||||||
if (mkdir("/run/systemd/volatile-sysroot/usr", 0755) < 0) {
|
if (mkdir("/run/systemd/volatile-sysroot/usr", 0755) < 0) {
|
||||||
r = -errno;
|
r = log_error_errno(errno, "Failed to create /usr directory: %m");
|
||||||
goto finish_umount;
|
goto finish_umount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,8 +43,10 @@ static int make_volatile(const char *path) {
|
||||||
goto finish_umount;
|
goto finish_umount;
|
||||||
|
|
||||||
r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", true, NULL);
|
r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", true, NULL);
|
||||||
if (r < 0)
|
if (r < 0) {
|
||||||
|
log_error_errno(r, "Failed to remount /usr read-only: %m");
|
||||||
goto finish_umount;
|
goto finish_umount;
|
||||||
|
}
|
||||||
|
|
||||||
r = umount_recursive(path, 0);
|
r = umount_recursive(path, 0);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
|
@ -64,7 +55,7 @@ static int make_volatile(const char *path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
|
if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
|
||||||
log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC: %m", path);
|
log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC, ignoring: %m", path);
|
||||||
|
|
||||||
r = mount_verbose(LOG_ERR, "/run/systemd/volatile-sysroot", path, NULL, MS_MOVE, NULL);
|
r = mount_verbose(LOG_ERR, "/run/systemd/volatile-sysroot", path, NULL, MS_MOVE, NULL);
|
||||||
|
|
||||||
|
@ -77,9 +68,55 @@ finish_rmdir:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int make_overlay(const char *path) {
|
||||||
|
_cleanup_free_ char *escaped_path = NULL;
|
||||||
|
bool tmpfs_mounted = false;
|
||||||
|
const char *options = NULL;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(path);
|
||||||
|
|
||||||
|
r = mkdir_p("/run/systemd/overlay-sysroot", 0700);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Couldn't create overlay sysroot directory: %m");
|
||||||
|
|
||||||
|
r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/overlay-sysroot", "tmpfs", MS_STRICTATIME, "mode=755");
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
tmpfs_mounted = true;
|
||||||
|
|
||||||
|
if (mkdir("/run/systemd/overlay-sysroot/upper", 0755) < 0) {
|
||||||
|
r = log_error_errno(errno, "Failed to create /run/systemd/overlay-sysroot/upper: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mkdir("/run/systemd/overlay-sysroot/work", 0755) < 0) {
|
||||||
|
r = log_error_errno(errno, "Failed to create /run/systemd/overlay-sysroot/work: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
escaped_path = shell_escape(path, ",:");
|
||||||
|
if (!escaped_path) {
|
||||||
|
r = log_oom();
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
options = strjoina("lowerdir=", escaped_path, ",upperdir=/run/systemd/overlay-sysroot/upper,workdir=/run/systemd/overlay-sysroot/work");
|
||||||
|
r = mount_verbose(LOG_ERR, "overlay", path, "overlay", 0, options);
|
||||||
|
|
||||||
|
finish:
|
||||||
|
if (tmpfs_mounted)
|
||||||
|
(void) umount_verbose("/run/systemd/overlay-sysroot");
|
||||||
|
|
||||||
|
(void) rmdir("/run/systemd/overlay-sysroot");
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int run(int argc, char *argv[]) {
|
static int run(int argc, char *argv[]) {
|
||||||
VolatileMode m = _VOLATILE_MODE_INVALID;
|
VolatileMode m = _VOLATILE_MODE_INVALID;
|
||||||
const char *path;
|
const char *path;
|
||||||
|
dev_t devt;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
log_setup_service();
|
log_setup_service();
|
||||||
|
@ -94,10 +131,8 @@ static int run(int argc, char *argv[]) {
|
||||||
if (r == 0 && argc >= 2) {
|
if (r == 0 && argc >= 2) {
|
||||||
/* The kernel command line always wins. However if nothing was set there, the argument passed here wins instead. */
|
/* The kernel command line always wins. However if nothing was set there, the argument passed here wins instead. */
|
||||||
m = volatile_mode_from_string(argv[1]);
|
m = volatile_mode_from_string(argv[1]);
|
||||||
if (m < 0) {
|
if (m < 0)
|
||||||
log_error("Couldn't parse volatile mode: %s", argv[1]);
|
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Couldn't parse volatile mode: %s", argv[1]);
|
||||||
r = -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc < 3)
|
if (argc < 3)
|
||||||
|
@ -116,10 +151,47 @@ static int run(int argc, char *argv[]) {
|
||||||
"Directory cannot be the root directory.");
|
"Directory cannot be the root directory.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m != VOLATILE_YES)
|
if (!IN_SET(m, VOLATILE_YES, VOLATILE_OVERLAY))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return make_volatile(path);
|
r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path);
|
||||||
|
if (r == 0)
|
||||||
|
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "%s is not a mount point.", path);
|
||||||
|
|
||||||
|
r = path_is_temporary_fs(path);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path);
|
||||||
|
if (r > 0) {
|
||||||
|
log_info("%s already is a temporary file system.", path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We are about to replace the root directory with something else. Later code might want to know what we
|
||||||
|
* replaced here, hence let's save that information as a symlink we can later use. (This is particularly
|
||||||
|
* relevant for the overlayfs case where we'll fully obstruct the view onto the underlying device, hence
|
||||||
|
* querying the backing device node from the file system directly is no longer possible. */
|
||||||
|
r = get_block_device_harder(path, &devt);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to determine device major/minor of %s: %m", path);
|
||||||
|
else if (r > 0) {
|
||||||
|
_cleanup_free_ char *dn = NULL;
|
||||||
|
|
||||||
|
r = device_path_make_major_minor(S_IFBLK, devt, &dn);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to format device node path: %m");
|
||||||
|
|
||||||
|
if (symlink(dn, "/run/systemd/volatile-root") < 0)
|
||||||
|
log_warning_errno(errno, "Failed to create symlink /run/systemd/volatile-root: %m");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m == VOLATILE_YES)
|
||||||
|
return make_volatile(path);
|
||||||
|
else {
|
||||||
|
assert(m == VOLATILE_OVERLAY);
|
||||||
|
return make_overlay(path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_MAIN_FUNCTION(run);
|
DEFINE_MAIN_FUNCTION(run);
|
||||||
|
|
Loading…
Reference in a new issue