Merge pull request #16677 from poettering/statx-mntid

make use of new kernel 5.8 statx() mount id/mountpoint APIs
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2020-08-20 10:58:14 +02:00 committed by GitHub
commit 0cd9ccb654
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 106 additions and 49 deletions

3
TODO
View File

@ -19,9 +19,6 @@ Features:
* nss-systemd: also synthesize shadow records for users/groups
* make use of the new statx mountid and rootmount fields in path_get_mnt_id()
and fd_is_mount_point()
* nspawn: move "incoming mount" directory to /run/host, move "inaccessible"
nodes to /run/host, move notify socket (for sd_notify() between payload and
container manager)

View File

@ -8,38 +8,47 @@
#include <linux/stat.h>
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
/* Thew newest definition we are aware of (fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60; 5.8) */
#define STATX_DEFINITION { \
__u32 stx_mask; \
__u32 stx_blksize; \
__u64 stx_attributes; \
__u32 stx_nlink; \
__u32 stx_uid; \
__u32 stx_gid; \
__u16 stx_mode; \
__u16 __spare0[1]; \
__u64 stx_ino; \
__u64 stx_size; \
__u64 stx_blocks; \
__u64 stx_attributes_mask; \
struct statx_timestamp stx_atime; \
struct statx_timestamp stx_btime; \
struct statx_timestamp stx_ctime; \
struct statx_timestamp stx_mtime; \
__u32 stx_rdev_major; \
__u32 stx_rdev_minor; \
__u32 stx_dev_major; \
__u32 stx_dev_minor; \
__u64 stx_mnt_id; \
__u64 __spare2; \
__u64 __spare3[12]; \
}
#if !HAVE_STRUCT_STATX
struct statx_timestamp {
__s64 tv_sec;
__u32 tv_nsec;
__s32 __reserved;
};
struct statx {
__u32 stx_mask;
__u32 stx_blksize;
__u64 stx_attributes;
__u32 stx_nlink;
__u32 stx_uid;
__u32 stx_gid;
__u16 stx_mode;
__u16 __spare0[1];
__u64 stx_ino;
__u64 stx_size;
__u64 stx_blocks;
__u64 stx_attributes_mask;
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
__u32 stx_rdev_major;
__u32 stx_rdev_minor;
__u32 stx_dev_major;
__u32 stx_dev_minor;
__u64 __spare2[14];
};
struct statx STATX_DEFINITION;
#endif
/* Always define the newest version we are aware of as a distinct type, so that we can use it even if glibc
* defines an older definition */
struct new_statx STATX_DEFINITION;
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_BTIME
#define STATX_BTIME 0x00000800U
@ -49,3 +58,13 @@ struct statx {
#ifndef AT_STATX_DONT_SYNC
#define AT_STATX_DONT_SYNC 0x4000
#endif
/* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */
#ifndef STATX_MNT_ID
#define STATX_MNT_ID 0x00001000U
#endif
/* 80340fe3605c0e78cfe496c3b3878be828cfdbfe (5.8) */
#ifndef STATX_ATTR_MOUNT_ROOT
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#endif

View File

@ -482,7 +482,7 @@ static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flag
# endif
}
# define statx missing_statx
# define statx(dfd, filename, flags, mask, buffer) missing_statx(dfd, filename, flags, mask, buffer)
#endif
#if !HAVE_SET_MEMPOLICY

View File

@ -8,6 +8,8 @@
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "missing_stat.h"
#include "missing_syscall.h"
#include "mountpoint-util.h"
#include "parse-util.h"
#include "path-util.h"
@ -32,6 +34,8 @@ int name_to_handle_at_loop(
_cleanup_free_ struct file_handle *h = NULL;
size_t n = ORIGINAL_MAX_HANDLE_SZ;
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
/* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
* buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
* start value, it is not an upper bound on the buffer size required.
@ -86,13 +90,16 @@ int name_to_handle_at_loop(
}
}
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
_cleanup_free_ char *fdinfo = NULL;
_cleanup_close_ int subfd = -1;
char *p;
int r;
assert(ret_mnt_id);
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
if ((flags & AT_EMPTY_PATH) && isempty(filename))
xsprintf(path, "/proc/self/fdinfo/%i", fd);
else {
@ -121,7 +128,7 @@ static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id
p += strspn(p, WHITESPACE);
p[strcspn(p, WHITESPACE)] = 0;
return safe_atoi(p, mnt_id);
return safe_atoi(p, ret_mnt_id);
}
int fd_is_mount_point(int fd, const char *filename, int flags) {
@ -129,33 +136,46 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
int mount_id = -1, mount_id_parent = -1;
bool nosupp = false, check_st_dev = true;
struct stat a, b;
struct statx sx
#if HAS_FEATURE_MEMORY_SANITIZER
= {}
# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
#endif
;
int r;
assert(fd >= 0);
assert(filename);
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
/* First we will try the name_to_handle_at() syscall, which
* tells us the mount id and an opaque file "handle". It is
* not supported everywhere though (kernel compile-time
* option, not all file systems are hooked up). If it works
* the mount id is usually good enough to tell us whether
* something is a mount point.
/* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
* since kernel 5.8.
*
* If that didn't work we will try to read the mount id from
* /proc/self/fdinfo/<fd>. This is almost as good as
* name_to_handle_at(), however, does not return the
* opaque file handle. The opaque file handle is pretty useful
* to detect the root directory, which we should always
* consider a mount point. Hence we use this only as
* fallback. Exporting the mnt_id in fdinfo is a pretty recent
* If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
* an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
* all file systems are hooked up). If it works the mount id is usually good enough to tell us
* whether something is a mount point.
*
* If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
* as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
* handle is pretty useful to detect the root directory, which we should always consider a mount
* point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
* kernel addition.
*
* As last fallback we do traditional fstat() based st_dev
* comparisons. This is how things were traditionally done,
* but unionfs breaks this since it exposes file
* systems with a variety of st_dev reported. Also, btrfs
* subvolumes have different st_dev, even though they aren't
* real mounts of their own. */
* As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
* traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
* reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
* their own. */
if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) |
(flags & AT_EMPTY_PATH) |
AT_NO_AUTOMOUNT, 0, &sx) < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
return -errno;
/* If statx() is not available or forbidden, fallback to name_to_handle_at() below */
} else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
@ -278,8 +298,29 @@ int path_is_mount_point(const char *t, const char *root, int flags) {
}
int path_get_mnt_id(const char *path, int *ret) {
union {
struct statx sx;
struct new_statx nsx;
} buf
#if HAS_FEATURE_MEMORY_SANITIZER
= {}
# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
#endif
;
int r;
if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) {
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
return -errno;
/* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
* privileges */
} else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
*ret = buf.nsx.stx_mnt_id;
return 0;
}
r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);