diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index ffe3d4cc64..dd44d5eb92 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -23,6 +23,7 @@ #include "macro.h" #include "mkdir.h" #include "mount-setup.h" +#include "mount-util.h" #include "mountpoint-util.h" #include "nulstr-util.h" #include "path-util.h" @@ -60,51 +61,51 @@ typedef struct MountPoint { #endif static const MountPoint mount_table[] = { - { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_STRICTATIME, + { "devtmpfs", "/dev", "devtmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_NOEXEC|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #if ENABLE_SMACK - { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV, mac_smack_use, MNT_FATAL }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME, mac_smack_use, MNT_FATAL }, #endif - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, + { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, NULL, MNT_IN_CONTAINER }, #if ENABLE_SMACK - { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, mac_smack_use, MNT_FATAL }, #endif - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_legacy_wanted, MNT_IN_CONTAINER }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #if ENABLE_EFI - { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, is_efi_boot, MNT_NONE }, #endif - { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE, }, }; @@ -352,7 +353,7 @@ int mount_cgroup_controllers(void) { } /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */ - (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); + (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP); return 0; } diff --git a/src/core/namespace.c b/src/core/namespace.c index 36bc95a2df..34eb469fb8 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -130,9 +130,9 @@ static const MountEntry protect_home_read_only_table[] = { /* ProtectHome=tmpfs table */ static const MountEntry protect_home_tmpfs_table[] = { - { "/home", TMPFS, true, .read_only = true, .options_const = "mode=0755", .flags = MS_NODEV|MS_STRICTATIME }, - { "/run/user", TMPFS, true, .read_only = true, .options_const = "mode=0755", .flags = MS_NODEV|MS_STRICTATIME }, - { "/root", TMPFS, true, .read_only = true, .options_const = "mode=0700", .flags = MS_NODEV|MS_STRICTATIME }, + { "/home", TMPFS, true, .read_only = true, .options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NODEV|MS_STRICTATIME }, + { "/run/user", TMPFS, true, .read_only = true, .options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NODEV|MS_STRICTATIME }, + { "/root", TMPFS, true, .read_only = true, .options_const = "mode=0700" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NODEV|MS_STRICTATIME }, }; /* ProtectHome=yes table */ @@ -295,7 +295,7 @@ static int append_empty_dir_mounts(MountEntry **p, char **strv) { .mode = EMPTY_DIR, .ignore = false, .read_only = true, - .options_const = "mode=755", + .options_const = "mode=755" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, }; } @@ -341,7 +341,7 @@ static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, "Path is not absolute: %s", t->path); - str = strjoin("mode=0755,", t->options); + str = strjoin("mode=0755" TMPFS_LIMITS_TEMPORARY_FS ",", t->options); if (!str) return -ENOMEM; @@ -686,7 +686,7 @@ static int mount_private_dev(MountEntry *m) { dev = strjoina(temporary_mount, "/dev"); (void) mkdir(dev, 0755); - if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) { + if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755" TMPFS_LIMITS_DEV) < 0) { r = log_debug_errno(errno, "Failed to mount tmpfs on '%s': %m", dev); goto fail; } diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c index 08c7b76dba..bef2f27d3a 100644 --- a/src/fstab-generator/fstab-generator.c +++ b/src/fstab-generator/fstab-generator.c @@ -809,7 +809,7 @@ static int add_volatile_var(void) { "/var", NULL, "tmpfs", - "mode=0755", + "mode=0755" TMPFS_LIMITS_VAR, 0, 0, SPECIAL_LOCAL_FS_TARGET, diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index dcee851e93..a16ee5c60a 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -319,7 +319,7 @@ static int mount_legacy_cgns_supported( * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply * pass uid 0 and not uid_shift to tmpfs_patch_options(). */ - r = tmpfs_patch_options("mode=755", 0, selinux_apifs_context, &options); + r = tmpfs_patch_options("mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, 0, selinux_apifs_context, &options); if (r < 0) return log_oom(); @@ -421,7 +421,7 @@ static int mount_legacy_cgns_unsupported( if (r == 0) { _cleanup_free_ char *options = NULL; - r = tmpfs_patch_options("mode=755", uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options); + r = tmpfs_patch_options("mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options); if (r < 0) return log_oom(); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 59bd73d2ea..33cc19a425 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -602,25 +602,25 @@ int mount_all(const char *dest, MOUNT_IN_USERNS|MOUNT_MKDIR }, /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */ - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/tmp", "tmpfs", "mode=1777" TMPFS_LIMITS_TMP, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR }, - { "tmpfs", "/sys", "tmpfs", "mode=555", MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR }, - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */ - { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */ - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, + { "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" TMPFS_LIMITS_DEV_SHM, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, + { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, #if HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, 0 }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, 0 }, /* Then, make it r/o */ #endif }; @@ -1023,7 +1023,7 @@ static int setup_volatile_state(const char *directory, uid_t uid_shift, const ch if (r < 0 && errno != EEXIST) return log_error_errno(errno, "Failed to create %s: %m", directory); - options = "mode=755"; + options = "mode=755" TMPFS_LIMITS_VOLATILE_STATE; r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); if (r < 0) return log_oom(); @@ -1068,7 +1068,7 @@ static int setup_volatile_yes(const char *directory, uid_t uid_shift, const char if (!mkdtemp(template)) return log_error_errno(errno, "Failed to create temporary directory: %m"); - options = "mode=755"; + options = "mode=755" TMPFS_LIMITS_ROOTFS; r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); if (r < 0) goto fail; @@ -1135,7 +1135,7 @@ static int setup_volatile_overlay(const char *directory, uid_t uid_shift, const if (!mkdtemp(template)) return log_error_errno(errno, "Failed to create temporary directory: %m"); - options = "mode=755"; + options = "mode=755" TMPFS_LIMITS_ROOTFS; r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); if (r < 0) goto finish; diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 06fddacf16..024787912b 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -6,6 +6,23 @@ #include "macro.h" +/* 4MB for contents of regular files, 64k inodes for directories, symbolic links and device specials, + using large storage array systems as a baseline */ +#define TMPFS_LIMITS_DEV ",size=4m,nr_inodes=64k" +/* Very little, if any use expected */ +#define TMPFS_LIMITS_EMPTY_OR_ALMOST ",size=4m,nr_inodes=1k" +#define TMPFS_LIMITS_SYS TMPFS_LIMITS_EMPTY_OR_ALMOST +#define TMPFS_LIMITS_SYS_FS_CGROUP TMPFS_LIMITS_EMPTY_OR_ALMOST +/* 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25% + translates to 1M inodes */ +#define TMPFS_LIMITS_TMP ",size=10%,nr_inodes=400k" +#define TMPFS_LIMITS_DEV_SHM TMPFS_LIMITS_TMP +#define TMPFS_LIMITS_RUN TMPFS_LIMITS_TMP +#define TMPFS_LIMITS_TEMPORARY_FS TMPFS_LIMITS_TMP +#define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m" +#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR +#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR + int repeat_unmount(const char *path, int flags); int umount_recursive(const char *target, int flags); int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist); diff --git a/src/volatile-root/volatile-root.c b/src/volatile-root/volatile-root.c index af78a87d6f..e55864d6cc 100644 --- a/src/volatile-root/volatile-root.c +++ b/src/volatile-root/volatile-root.c @@ -29,7 +29,7 @@ static int make_volatile(const char *path) { if (r < 0) return log_error_errno(r, "Couldn't generate volatile sysroot directory: %m"); - r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/volatile-sysroot", "tmpfs", MS_STRICTATIME, "mode=755"); + r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/volatile-sysroot", "tmpfs", MS_STRICTATIME, "mode=755" TMPFS_LIMITS_ROOTFS); if (r < 0) goto finish_rmdir; @@ -80,7 +80,7 @@ static int make_overlay(const char *path) { if (r < 0) return log_error_errno(r, "Couldn't create overlay sysroot directory: %m"); - r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/overlay-sysroot", "tmpfs", MS_STRICTATIME, "mode=755"); + r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/overlay-sysroot", "tmpfs", MS_STRICTATIME, "mode=755" TMPFS_LIMITS_ROOTFS); if (r < 0) goto finish; diff --git a/units/tmp.mount b/units/tmp.mount index 27bd0f235c..7066e52261 100644 --- a/units/tmp.mount +++ b/units/tmp.mount @@ -22,4 +22,4 @@ After=swap.target What=tmpfs Where=/tmp Type=tmpfs -Options=mode=1777,strictatime,nosuid,nodev +Options=mode=1777,strictatime,nosuid,nodev,size=10%,nr_inodes=400k