From ca194a2a0c822684ca201021eb69d4190f128b4c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 1 Sep 2020 18:42:01 +0200 Subject: [PATCH] stat-util: provide single fallback implementation of statx() This simplifies things quite a bit, and is reusable wherever we want to use statx() later on. Not sure why I didn't do it like this right from the beginning... --- src/basic/missing_stat.h | 30 ++++++ src/basic/stat-util.c | 57 +++++++++++ src/basic/stat-util.h | 3 + src/tmpfiles/tmpfiles.c | 200 ++++++++++++++------------------------- 4 files changed, 160 insertions(+), 130 deletions(-) diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h index a5bb2bfd02..7bdc8a7efa 100644 --- a/src/basic/missing_stat.h +++ b/src/basic/missing_stat.h @@ -49,6 +49,16 @@ struct statx STATX_DEFINITION; * defines an older definition */ struct new_statx STATX_DEFINITION; +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef AT_STATX_SYNC_AS_STAT +#define AT_STATX_SYNC_AS_STAT 0x0000 +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef AT_STATX_FORCE_SYNC +#define AT_STATX_FORCE_SYNC 0x2000 +#endif + /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ #ifndef AT_STATX_DONT_SYNC #define AT_STATX_DONT_SYNC 0x4000 @@ -64,11 +74,21 @@ struct new_statx STATX_DEFINITION; #define STATX_MODE 0x00000002U #endif +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_NLINK +#define STATX_NLINK 0x00000004U +#endif + /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ #ifndef STATX_UID #define STATX_UID 0x00000008U #endif +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_GID +#define STATX_GID 0x00000010U +#endif + /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ #ifndef STATX_ATIME #define STATX_ATIME 0x00000020U @@ -89,6 +109,16 @@ struct new_statx STATX_DEFINITION; #define STATX_INO 0x00000100U #endif +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_SIZE +#define STATX_SIZE 0x00000200U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_BLOCKS +#define STATX_BLOCKS 0x00000400U +#endif + /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ #ifndef STATX_BTIME #define STATX_BTIME 0x00000800U diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c index 904584a985..574815bc43 100644 --- a/src/basic/stat-util.c +++ b/src/basic/stat-util.c @@ -15,6 +15,7 @@ #include "macro.h" #include "missing_fs.h" #include "missing_magic.h" +#include "missing_syscall.h" #include "parse-util.h" #include "stat-util.h" #include "string-util.h" @@ -413,3 +414,59 @@ bool stat_inode_unmodified(const struct stat *a, const struct stat *b) { a->st_ino == b->st_ino && (!(S_ISCHR(a->st_mode) || S_ISBLK(a->st_mode)) || a->st_rdev == b->st_rdev); /* if device node, also compare major/minor, because we can */ } + +int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx) { + static bool avoid_statx = false; + struct stat st; + + if (!avoid_statx) { + if (statx(dfd, path, flags, mask, sx) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EPERM) + return -errno; + + /* If statx() is not supported or if we see EPERM (which might indicate seccomp + * filtering or so), let's do a fallback. Not that on EACCES we'll not fall back, + * since that is likely an indication of fs access issues, which we should + * propagate */ + } else + return 0; + + avoid_statx = true; + } + + /* Only do fallback if fstatat() supports the flag too, or if it's one of the sync flags, which are + * OK to ignore */ + if ((flags & ~(AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW| + AT_STATX_SYNC_AS_STAT|AT_STATX_FORCE_SYNC|AT_STATX_DONT_SYNC)) != 0) + return -EOPNOTSUPP; + + if (fstatat(dfd, path, &st, flags & (AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW)) < 0) + return -errno; + + *sx = (struct statx) { + .stx_mask = STATX_TYPE|STATX_MODE| + STATX_NLINK|STATX_UID|STATX_GID| + STATX_ATIME|STATX_MTIME|STATX_CTIME| + STATX_INO|STATX_SIZE|STATX_BLOCKS, + .stx_blksize = st.st_blksize, + .stx_nlink = st.st_nlink, + .stx_uid = st.st_uid, + .stx_gid = st.st_gid, + .stx_mode = st.st_mode, + .stx_ino = st.st_ino, + .stx_size = st.st_size, + .stx_blocks = st.st_blocks, + .stx_rdev_major = major(st.st_rdev), + .stx_rdev_minor = minor(st.st_rdev), + .stx_dev_major = major(st.st_dev), + .stx_dev_minor = minor(st.st_dev), + .stx_atime.tv_sec = st.st_atim.tv_sec, + .stx_atime.tv_nsec = st.st_atim.tv_nsec, + .stx_mtime.tv_sec = st.st_mtim.tv_sec, + .stx_mtime.tv_nsec = st.st_mtim.tv_nsec, + .stx_ctime.tv_sec = st.st_ctim.tv_sec, + .stx_ctime.tv_nsec = st.st_ctim.tv_nsec, + }; + + return 0; +} diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h index b14451b4e7..26ecd635f1 100644 --- a/src/basic/stat-util.h +++ b/src/basic/stat-util.h @@ -10,6 +10,7 @@ #include #include "macro.h" +#include "missing_stat.h" int is_symlink(const char *path); int is_dir(const char *path, bool follow); @@ -92,6 +93,8 @@ int proc_mounted(void); bool stat_inode_unmodified(const struct stat *a, const struct stat *b); +int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx); + #if HAS_FEATURE_MEMORY_SANITIZER # warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this." # define STRUCT_STATX_DEFINE(var) \ diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index 36e31d046c..aec8ddc904 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -517,7 +517,6 @@ static int dir_cleanup( int maxdepth, bool keep_this_level) { - static bool use_statx = true; bool deleted = false; struct dirent *dent; int r = 0; @@ -525,110 +524,73 @@ static int dir_cleanup( FOREACH_DIRENT_ALL(dent, d, break) { _cleanup_free_ char *sub_path = NULL; nsec_t atime_nsec, mtime_nsec, ctime_nsec, btime_nsec; - mode_t mode; - uid_t uid; if (dot_or_dot_dot(dent->d_name)) continue; - if (use_statx) { - /* If statx() is supported, use it. It's preferable over fstatat() since it tells us - * explicitly where we are looking at a mount point, for free as side - * information. Determing the same information without statx() is hard, see the - * complexity of path_is_mount_point(), and also much slower as it requires a numbre - * of syscalls instead of just one. Hence, when we have modern statx() we use it - * instead of fstat() and do proper mount point checks, while on older kernels's well - * do traditional st_dev based detection of mount points. - * - * Using statx() for detecting mount points also has the benfit that we handle weird - * file systems such as overlayfs better where each file is originating from a - * different st_dev. */ + /* If statx() is supported, use it. It's preferable over fstatat() since it tells us + * explicitly where we are looking at a mount point, for free as side information. Determing + * the same information without statx() is hard, see the complexity of path_is_mount_point(), + * and also much slower as it requires a number of syscalls instead of just one. Hence, when + * we have modern statx() we use it instead of fstat() and do proper mount point checks, + * while on older kernels's well do traditional st_dev based detection of mount points. + * + * Using statx() for detecting mount points also has the benfit that we handle weird file + * systems such as overlayfs better where each file is originating from a different + * st_dev. */ - STRUCT_STATX_DEFINE(sx); + STRUCT_STATX_DEFINE(sx); - if (statx(dirfd(d), dent->d_name, - AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, - STATX_TYPE|STATX_MODE|STATX_UID|STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME, - &sx) < 0) { - - if (errno == ENOENT) - continue; - if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EPERM) - use_statx = false; /* Not supported or blocked by seccomp or so */ - else { - /* FUSE, NFS mounts, SELinux might return EACCES */ - r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno, - "statx(%s/%s) failed: %m", p, dent->d_name); - continue; - } - } else { - if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) { - /* Yay, we have the mount point API, use it */ - if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) { - log_debug("Ignoring \"%s/%s\": different mount points.", p, dent->d_name); - continue; - } - } else { - /* So we have statx() but the STATX_ATTR_MOUNT_ROOT flag is not - * supported, fall back to traditional stx_dev checking. */ - if (sx.stx_dev_major != rootdev_major || - sx.stx_dev_minor != rootdev_minor) { - log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name); - continue; - } - } - - mode = sx.stx_mode; - uid = sx.stx_uid; - atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? load_statx_timestamp_nsec(&sx.stx_atime) : 0; - mtime_nsec = FLAGS_SET(sx.stx_mask, STATX_MTIME) ? load_statx_timestamp_nsec(&sx.stx_mtime) : 0; - ctime_nsec = FLAGS_SET(sx.stx_mask, STATX_CTIME) ? load_statx_timestamp_nsec(&sx.stx_ctime) : 0; - btime_nsec = FLAGS_SET(sx.stx_mask, STATX_BTIME) ? load_statx_timestamp_nsec(&sx.stx_btime) : 0; - } + r = statx_fallback( + dirfd(d), dent->d_name, + AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, + STATX_TYPE|STATX_MODE|STATX_UID|STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME, + &sx); + if (r == -ENOENT) + continue; + if (r < 0) { + /* FUSE, NFS mounts, SELinux might return EACCES */ + r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno, + "statx(%s/%s) failed: %m", p, dent->d_name); + continue; } - if (!use_statx) { - struct stat s; - - if (fstatat(dirfd(d), dent->d_name, &s, AT_SYMLINK_NOFOLLOW) < 0) { - if (errno == ENOENT) - continue; - - /* FUSE, NFS mounts, SELinux might return EACCES */ - r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno, - "stat(%s/%s) failed: %m", p, dent->d_name); + if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) { + /* Yay, we have the mount point API, use it */ + if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) { + log_debug("Ignoring \"%s/%s\": different mount points.", p, dent->d_name); continue; } - - /* Stay on the same filesystem */ - if (major(s.st_dev) != rootdev_major || minor(s.st_dev) != rootdev_minor) { + } else { + /* So we might have statx() but the STATX_ATTR_MOUNT_ROOT flag is not supported, fall + * back to traditional stx_dev checking. */ + if (sx.stx_dev_major != rootdev_major || + sx.stx_dev_minor != rootdev_minor) { log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name); continue; } - mode = s.st_mode; - uid = s.st_uid; - atime_nsec = timespec_load_nsec(&s.st_atim); - mtime_nsec = timespec_load_nsec(&s.st_mtim); - ctime_nsec = timespec_load_nsec(&s.st_ctim); - btime_nsec = 0; - } + /* Try to detect bind mounts of the same filesystem instance; they do not differ in device + * major/minors. This type of query is not supported on all kernels or filesystem types + * though. */ + if (S_ISDIR(sx.stx_mode)) { + int q; - /* Try to detect bind mounts of the same filesystem instance; they - * do not differ in device major/minors. This type of query is not - * supported on all kernels or filesystem types though. */ - if (S_ISDIR(mode)) { - int q; - - q = fd_is_mount_point(dirfd(d), dent->d_name, 0); - if (q < 0) - log_debug_errno(q, "Failed to determine whether \"%s/%s\" is a mount point, ignoring: %m", p, dent->d_name); - else if (q > 0) { - log_debug("Ignoring \"%s/%s\": different mount of the same filesystem.", p, dent->d_name); - continue; + q = fd_is_mount_point(dirfd(d), dent->d_name, 0); + if (q < 0) + log_debug_errno(q, "Failed to determine whether \"%s/%s\" is a mount point, ignoring: %m", p, dent->d_name); + else if (q > 0) { + log_debug("Ignoring \"%s/%s\": different mount of the same filesystem.", p, dent->d_name); + continue; + } } } + atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? load_statx_timestamp_nsec(&sx.stx_atime) : 0; + mtime_nsec = FLAGS_SET(sx.stx_mask, STATX_MTIME) ? load_statx_timestamp_nsec(&sx.stx_mtime) : 0; + ctime_nsec = FLAGS_SET(sx.stx_mask, STATX_CTIME) ? load_statx_timestamp_nsec(&sx.stx_ctime) : 0; + btime_nsec = FLAGS_SET(sx.stx_mask, STATX_BTIME) ? load_statx_timestamp_nsec(&sx.stx_btime) : 0; + sub_path = path_join(p, dent->d_name); if (!sub_path) { r = log_oom(); @@ -646,12 +608,12 @@ static int dir_cleanup( continue; } - if (S_ISDIR(mode)) { + if (S_ISDIR(sx.stx_mode)) { _cleanup_closedir_ DIR *sub_dir = NULL; if (mountpoint && streq(dent->d_name, "lost+found") && - uid == 0) { + sx.stx_uid == 0) { log_debug("Ignoring directory \"%s\".", sub_path); continue; } @@ -727,14 +689,14 @@ static int dir_cleanup( } else { /* Skip files for which the sticky bit is set. These are semantics we define, and are * unknown elsewhere. See XDG_RUNTIME_DIR specification for details. */ - if (mode & S_ISVTX) { + if (sx.stx_mode & S_ISVTX) { log_debug("Skipping \"%s\": sticky bit set.", sub_path); continue; } if (mountpoint && - S_ISREG(mode) && - uid == 0 && + S_ISREG(sx.stx_mode) && + sx.stx_uid == 0 && STR_IN_SET(dent->d_name, ".journal", "aquota.user", @@ -744,13 +706,13 @@ static int dir_cleanup( } /* Ignore sockets that are listed in /proc/net/unix */ - if (S_ISSOCK(mode) && unix_socket_alive(sub_path)) { + if (S_ISSOCK(sx.stx_mode) && unix_socket_alive(sub_path)) { log_debug("Skipping \"%s\": live socket.", sub_path); continue; } /* Ignore device nodes */ - if (S_ISCHR(mode) || S_ISBLK(mode)) { + if (S_ISCHR(sx.stx_mode) || S_ISBLK(sx.stx_mode)) { log_debug("Skipping \"%s\": a device.", sub_path); continue; } @@ -2286,12 +2248,9 @@ static int remove_item(Item *i) { static int clean_item_instance(Item *i, const char* instance) { char timestamp[FORMAT_TIMESTAMP_MAX]; _cleanup_closedir_ DIR *d = NULL; - uint32_t dev_major, dev_minor; - nsec_t atime_nsec, mtime_nsec; STRUCT_STATX_DEFINE(sx); - int mountpoint = -1; + int mountpoint, r; usec_t cutoff, n; - uint64_t ino; assert(i); @@ -2314,53 +2273,34 @@ static int clean_item_instance(Item *i, const char* instance) { return log_error_errno(errno, "Failed to open directory %s: %m", instance); } - if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx) < 0) { - struct stat s; + r = statx_fallback(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx); + if (r < 0) + return log_error_errno(r, "statx(%s) failed: %m", instance); - if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) - return log_error_errno(errno, "statx(%s) failed: %m", i->path); - - if (fstat(dirfd(d), &s) < 0) - return log_error_errno(errno, "stat(%s) failed: %m", i->path); - - dev_major = major(s.st_dev); - dev_minor = minor(s.st_dev); - ino = s.st_ino; - atime_nsec = timespec_load_nsec(&s.st_atim); - mtime_nsec = timespec_load_nsec(&s.st_mtim); - } else { - - if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) - mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); - - dev_major = sx.stx_dev_major; - dev_minor = sx.stx_dev_minor; - ino = sx.stx_ino; - atime_nsec = load_statx_timestamp_nsec(&sx.stx_atime); - mtime_nsec = load_statx_timestamp_nsec(&sx.stx_mtime); - } - - if (mountpoint < 0) { + if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) + mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); + else { struct stat ps; if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0) return log_error_errno(errno, "stat(%s/..) failed: %m", i->path); mountpoint = - dev_major != major(ps.st_dev) || - dev_minor != minor(ps.st_dev) || - ino != ps.st_ino; + sx.stx_dev_major != major(ps.st_dev) || + sx.stx_dev_minor != minor(ps.st_dev) || + sx.stx_ino != ps.st_ino; } - log_debug("Cleanup threshold for %s \"%s\" is %s", mountpoint ? "mount point" : "directory", instance, format_timestamp_style(timestamp, sizeof(timestamp), cutoff, TIMESTAMP_US)); return dir_cleanup(i, instance, d, - atime_nsec, mtime_nsec, cutoff * NSEC_PER_USEC, - dev_major, dev_minor, mountpoint, + load_statx_timestamp_nsec(&sx.stx_atime), + load_statx_timestamp_nsec(&sx.stx_mtime), + cutoff * NSEC_PER_USEC, + sx.stx_dev_major, sx.stx_dev_minor, mountpoint, MAX_DEPTH, i->keep_first_level); }