de40a3037a
This is a pretty large patch, and adds support for OCI runtime bundles to nspawn. A new switch --oci-bundle= is added that takes a path to an OCI bundle. The JSON file included therein is read similar to a .nspawn settings files, however with a different feature set. Implementation-wise this mostly extends the pre-existing Settings object to carry additional properties for OCI. However, OCI supports some concepts .nspawn files did not support yet, which this patch also adds: 1. Support for "masking" files and directories. This functionatly is now also available via the new --inaccesible= cmdline command, and Inaccessible= in .nspawn files. 2. Support for mounting arbitrary file systems. (not exposed through nspawn cmdline nor .nspawn files, because probably not a good idea) 3. Ability to configure the console settings for a container. This functionality is now also available on the nspawn cmdline in the new --console= switch (not added to .nspawn for now, as it is something specific to the invocation really, not a property of the container) 4. Console width/height configuration. Not exposed through .nspawn/cmdline, but this may be controlled through $COLUMNS and $LINES like in most other UNIX tools. 5. UID/GID configuration by raw numbers. (not exposed in .nspawn and on the cmdline, since containers likely have different user tables, and the existing --user= switch appears to be the better option) 6. OCI hook commands (no exposed in .nspawn/cmdline, as very specific to OCI) 7. Creation of additional devices nodes in /dev. Most likely not a good idea, hence not exposed in .nspawn/cmdline. There's already --bind= to achieve the same, which is the better alternative. 8. Explicit syscall filters. This is not a good idea, due to the skewed arch support, hence not exposed through .nspawn/cmdline. 9. Configuration of some sysctls on a whitelist. Questionnable, not supported in .nspawn/cmdline for now. 10. Configuration of all 5 types of capabilities. Not a useful concept, since the kernel will reduce the caps on execve() anyway. Not exposed through .nspawn/cmdline as this is not very useful hence. Note that this only implements the OCI runtime logic itself. It does not provide a runc-compatible command line tool. This is left for a later PR. Only with that in place tools such as "buildah" can use the OCI support in nspawn as drop-in replacement. Currently still missing is OCI hook support, but it's already parsed and everything, and should be easy to add. Other than that it's OCI is implemented pretty comprehensively. There's a list of incompatibilities in the nspawn-oci.c file. In a later PR I'd like to convert this into proper markdown and add it to the documentation directory.
63 lines
2.9 KiB
C
63 lines
2.9 KiB
C
/* SPDX-License-Identifier: LGPL-2.1+ */
|
|
#pragma once
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include "cgroup-util.h"
|
|
#include "volatile-util.h"
|
|
|
|
typedef enum MountSettingsMask {
|
|
MOUNT_FATAL = 1 << 0, /* if set, a mount error is considered fatal */
|
|
MOUNT_USE_USERNS = 1 << 1, /* if set, mounts are patched considering uid/gid shifts in a user namespace */
|
|
MOUNT_IN_USERNS = 1 << 2, /* if set, the mount is executed in the inner child, otherwise in the outer child */
|
|
MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sys will be mounted read-only, otherwise read-write. */
|
|
MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write.
|
|
Works only if MOUNT_APPLY_APIVFS_RO is also set. */
|
|
MOUNT_APPLY_TMPFS_TMP = 1 << 5, /* if set, /tmp will be mounted as tmpfs */
|
|
} MountSettingsMask;
|
|
|
|
typedef enum CustomMountType {
|
|
CUSTOM_MOUNT_BIND,
|
|
CUSTOM_MOUNT_TMPFS,
|
|
CUSTOM_MOUNT_OVERLAY,
|
|
CUSTOM_MOUNT_INACCESSIBLE,
|
|
CUSTOM_MOUNT_ARBITRARY,
|
|
_CUSTOM_MOUNT_TYPE_MAX,
|
|
_CUSTOM_MOUNT_TYPE_INVALID = -1
|
|
} CustomMountType;
|
|
|
|
typedef struct CustomMount {
|
|
CustomMountType type;
|
|
bool read_only;
|
|
char *source; /* for overlayfs this is the upper directory */
|
|
char *destination;
|
|
char *options;
|
|
char *work_dir;
|
|
char **lower;
|
|
char *rm_rf_tmpdir;
|
|
char *type_argument; /* only for CUSTOM_MOUNT_ARBITRARY */
|
|
bool graceful;
|
|
bool in_userns;
|
|
} CustomMount;
|
|
|
|
CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t);
|
|
void custom_mount_free_all(CustomMount *l, size_t n);
|
|
int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n);
|
|
|
|
int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
|
|
int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s);
|
|
int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
|
|
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
|
|
|
|
int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
|
|
int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
|
|
|
|
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool in_userns);
|
|
|
|
int setup_volatile_mode(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
|
|
|
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
|
|
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
|
|
|
|
int tmpfs_patch_options(const char *options,uid_t uid_shift, const char *selinux_apifs_context, char **ret);
|