Merge pull request #16877 from poettering/tmpfiles-statx

tmpfiles: use statx()
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2020-09-01 16:32:50 +02:00 committed by GitHub
commit f77d6ec953
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 263 additions and 70 deletions

View file

@ -50,13 +50,48 @@ struct statx STATX_DEFINITION;
struct new_statx STATX_DEFINITION;
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_BTIME
#define STATX_BTIME 0x00000800U
#ifndef AT_STATX_DONT_SYNC
#define AT_STATX_DONT_SYNC 0x4000
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef AT_STATX_DONT_SYNC
#define AT_STATX_DONT_SYNC 0x4000
#ifndef STATX_TYPE
#define STATX_TYPE 0x00000001U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_MODE
#define STATX_MODE 0x00000002U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_UID
#define STATX_UID 0x00000008U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_ATIME
#define STATX_ATIME 0x00000020U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_MTIME
#define STATX_MTIME 0x00000040U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_CTIME
#define STATX_CTIME 0x00000080U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_INO
#define STATX_INO 0x00000100U
#endif
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
#ifndef STATX_BTIME
#define STATX_BTIME 0x00000800U
#endif
/* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */

View file

@ -244,12 +244,28 @@ struct timespec *timespec_store(struct timespec *ts, usec_t u) {
if (u == USEC_INFINITY ||
u / USEC_PER_SEC >= TIME_T_MAX) {
ts->tv_sec = (time_t) -1;
ts->tv_nsec = (long) -1;
ts->tv_nsec = -1L;
return ts;
}
ts->tv_sec = (time_t) (u / USEC_PER_SEC);
ts->tv_nsec = (long int) ((u % USEC_PER_SEC) * NSEC_PER_USEC);
ts->tv_nsec = (long) ((u % USEC_PER_SEC) * NSEC_PER_USEC);
return ts;
}
struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n) {
assert(ts);
if (n == NSEC_INFINITY ||
n / NSEC_PER_SEC >= TIME_T_MAX) {
ts->tv_sec = (time_t) -1;
ts->tv_nsec = -1L;
return ts;
}
ts->tv_sec = (time_t) (n / NSEC_PER_SEC);
ts->tv_nsec = (long) (n % NSEC_PER_SEC);
return ts;
}

View file

@ -112,6 +112,7 @@ usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock);
usec_t timespec_load(const struct timespec *ts) _pure_;
nsec_t timespec_load_nsec(const struct timespec *ts) _pure_;
struct timespec *timespec_store(struct timespec *ts, usec_t u);
struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n);
usec_t timeval_load(const struct timeval *tv) _pure_;
struct timeval *timeval_store(struct timeval *tv, usec_t u);

View file

@ -38,6 +38,8 @@
#include "log.h"
#include "macro.h"
#include "main-func.h"
#include "missing_stat.h"
#include "missing_syscall.h"
#include "mkdir.h"
#include "mount-util.h"
#include "mountpoint-util.h"
@ -490,49 +492,137 @@ static DIR* opendir_nomod(const char *path) {
return xopendirat_nomod(AT_FDCWD, path);
}
static inline nsec_t load_statx_timestamp_nsec(const struct statx_timestamp *ts) {
assert(ts);
if (ts->tv_sec < 0)
return NSEC_INFINITY;
if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC)
return NSEC_INFINITY;
return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
}
static int dir_cleanup(
Item *i,
const char *p,
DIR *d,
const struct stat *ds,
usec_t cutoff,
dev_t rootdev,
nsec_t self_atime_nsec,
nsec_t self_mtime_nsec,
nsec_t cutoff_nsec,
dev_t rootdev_major,
dev_t rootdev_minor,
bool mountpoint,
int maxdepth,
bool keep_this_level) {
struct dirent *dent;
static bool use_statx = true;
bool deleted = false;
struct dirent *dent;
int r = 0;
FOREACH_DIRENT_ALL(dent, d, break) {
struct stat s;
usec_t age;
_cleanup_free_ char *sub_path = NULL;
nsec_t atime_nsec, mtime_nsec, ctime_nsec, btime_nsec;
mode_t mode;
uid_t uid;
if (dot_or_dot_dot(dent->d_name))
continue;
if (fstatat(dirfd(d), dent->d_name, &s, AT_SYMLINK_NOFOLLOW) < 0) {
if (errno == ENOENT)
continue;
if (use_statx) {
/* If statx() is supported, use it. It's preferable over fstatat() since it tells us
* explicitly where we are looking at a mount point, for free as side
* information. Determing the same information without statx() is hard, see the
* complexity of path_is_mount_point(), and also much slower as it requires a numbre
* of syscalls instead of just one. Hence, when we have modern statx() we use it
* instead of fstat() and do proper mount point checks, while on older kernels's well
* do traditional st_dev based detection of mount points.
*
* Using statx() for detecting mount points also has the benfit that we handle weird
* file systems such as overlayfs better where each file is originating from a
* different st_dev. */
/* FUSE, NFS mounts, SELinux might return EACCES */
r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
"stat(%s/%s) failed: %m", p, dent->d_name);
continue;
struct statx sx
#if HAS_FEATURE_MEMORY_SANITIZER
= {}
# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
#endif
;
if (statx(dirfd(d), dent->d_name,
AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT,
STATX_TYPE|STATX_MODE|STATX_UID|STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME,
&sx) < 0) {
if (errno == ENOENT)
continue;
if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EPERM)
use_statx = false; /* Not supported or blocked by seccomp or so */
else {
/* FUSE, NFS mounts, SELinux might return EACCES */
r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
"statx(%s/%s) failed: %m", p, dent->d_name);
continue;
}
} else {
if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) {
/* Yay, we have the mount point API, use it */
if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) {
log_debug("Ignoring \"%s/%s\": different mount points.", p, dent->d_name);
continue;
}
} else {
/* So we have statx() but the STATX_ATTR_MOUNT_ROOT flag is not
* supported, fall back to traditional stx_dev checking. */
if (sx.stx_dev_major != rootdev_major ||
sx.stx_dev_minor != rootdev_minor) {
log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
continue;
}
}
mode = sx.stx_mode;
uid = sx.stx_uid;
atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? load_statx_timestamp_nsec(&sx.stx_atime) : 0;
mtime_nsec = FLAGS_SET(sx.stx_mask, STATX_MTIME) ? load_statx_timestamp_nsec(&sx.stx_mtime) : 0;
ctime_nsec = FLAGS_SET(sx.stx_mask, STATX_CTIME) ? load_statx_timestamp_nsec(&sx.stx_ctime) : 0;
btime_nsec = FLAGS_SET(sx.stx_mask, STATX_BTIME) ? load_statx_timestamp_nsec(&sx.stx_btime) : 0;
}
}
/* Stay on the same filesystem */
if (s.st_dev != rootdev) {
log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
continue;
if (!use_statx) {
struct stat s;
if (fstatat(dirfd(d), dent->d_name, &s, AT_SYMLINK_NOFOLLOW) < 0) {
if (errno == ENOENT)
continue;
/* FUSE, NFS mounts, SELinux might return EACCES */
r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
"stat(%s/%s) failed: %m", p, dent->d_name);
continue;
}
/* Stay on the same filesystem */
if (major(s.st_dev) != rootdev_major || minor(s.st_dev) != rootdev_minor) {
log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
continue;
}
mode = s.st_mode;
uid = s.st_uid;
atime_nsec = timespec_load_nsec(&s.st_atim);
mtime_nsec = timespec_load_nsec(&s.st_mtim);
ctime_nsec = timespec_load_nsec(&s.st_ctim);
btime_nsec = 0;
}
/* Try to detect bind mounts of the same filesystem instance; they
* do not differ in device major/minors. This type of query is not
* supported on all kernels or filesystem types though. */
if (S_ISDIR(s.st_mode)) {
if (S_ISDIR(mode)) {
int q;
q = fd_is_mount_point(dirfd(d), dent->d_name, 0);
@ -561,12 +651,12 @@ static int dir_cleanup(
continue;
}
if (S_ISDIR(s.st_mode)) {
if (S_ISDIR(mode)) {
_cleanup_closedir_ DIR *sub_dir = NULL;
if (mountpoint &&
streq(dent->d_name, "lost+found") &&
s.st_uid == 0) {
uid == 0) {
log_debug("Ignoring directory \"%s\".", sub_path);
continue;
}
@ -589,7 +679,11 @@ static int dir_cleanup(
continue;
}
q = dir_cleanup(i, sub_path, sub_dir, &s, cutoff, rootdev, false, maxdepth-1, false);
q = dir_cleanup(i,
sub_path, sub_dir,
atime_nsec, mtime_nsec, cutoff_nsec,
rootdev_major, rootdev_minor,
false, maxdepth-1, false);
if (q < 0)
r = q;
}
@ -605,22 +699,28 @@ static int dir_cleanup(
}
/* Ignore ctime, we change it when deleting */
age = timespec_load(&s.st_mtim);
if (age >= cutoff) {
if (mtime_nsec != NSEC_INFINITY && mtime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
/* Follows spelling in stat(1). */
log_debug("Directory \"%s\": modify time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
format_timestamp_style(a, sizeof(a), mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
age = timespec_load(&s.st_atim);
if (age >= cutoff) {
if (atime_nsec != NSEC_INFINITY && atime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
log_debug("Directory \"%s\": access time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
format_timestamp_style(a, sizeof(a), atime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
if (btime_nsec != NSEC_INFINITY && btime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
log_debug("Directory \"%s\": birth time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), btime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
@ -632,14 +732,14 @@ static int dir_cleanup(
} else {
/* Skip files for which the sticky bit is set. These are semantics we define, and are
* unknown elsewhere. See XDG_RUNTIME_DIR specification for details. */
if (s.st_mode & S_ISVTX) {
if (mode & S_ISVTX) {
log_debug("Skipping \"%s\": sticky bit set.", sub_path);
continue;
}
if (mountpoint &&
S_ISREG(s.st_mode) &&
s.st_uid == 0 &&
S_ISREG(mode) &&
uid == 0 &&
STR_IN_SET(dent->d_name,
".journal",
"aquota.user",
@ -649,13 +749,13 @@ static int dir_cleanup(
}
/* Ignore sockets that are listed in /proc/net/unix */
if (S_ISSOCK(s.st_mode) && unix_socket_alive(sub_path)) {
if (S_ISSOCK(mode) && unix_socket_alive(sub_path)) {
log_debug("Skipping \"%s\": live socket.", sub_path);
continue;
}
/* Ignore device nodes */
if (S_ISCHR(s.st_mode) || S_ISBLK(s.st_mode)) {
if (S_ISCHR(mode) || S_ISBLK(mode)) {
log_debug("Skipping \"%s\": a device.", sub_path);
continue;
}
@ -666,31 +766,36 @@ static int dir_cleanup(
continue;
}
age = timespec_load(&s.st_mtim);
if (age >= cutoff) {
if (mtime_nsec != NSEC_INFINITY && mtime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
/* Follows spelling in stat(1). */
log_debug("File \"%s\": modify time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
format_timestamp_style(a, sizeof(a), mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
age = timespec_load(&s.st_atim);
if (age >= cutoff) {
if (atime_nsec != NSEC_INFINITY && atime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
log_debug("File \"%s\": access time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
format_timestamp_style(a, sizeof(a), atime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
age = timespec_load(&s.st_ctim);
if (age >= cutoff) {
if (ctime_nsec != NSEC_INFINITY && ctime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
log_debug("File \"%s\": change time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), age, TIMESTAMP_US));
format_timestamp_style(a, sizeof(a), ctime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
if (btime_nsec != NSEC_INFINITY && btime_nsec >= cutoff_nsec) {
char a[FORMAT_TIMESTAMP_MAX];
log_debug("File \"%s\": birth time %s is too new.",
sub_path,
format_timestamp_style(a, sizeof(a), btime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
continue;
}
@ -705,21 +810,19 @@ static int dir_cleanup(
finish:
if (deleted) {
char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX];
usec_t age1, age2;
age1 = timespec_load(&ds->st_atim);
age2 = timespec_load(&ds->st_mtim);
char a[FORMAT_TIMESTAMP_MAX], m[FORMAT_TIMESTAMP_MAX];
struct timespec ts[2];
log_debug("Restoring access and modification time on \"%s\": %s, %s",
p,
format_timestamp_style(a, sizeof(a), age1, TIMESTAMP_US),
format_timestamp_style(b, sizeof(b), age2, TIMESTAMP_US));
format_timestamp_style(a, sizeof(a), self_atime_nsec / NSEC_PER_USEC, TIMESTAMP_US),
format_timestamp_style(m, sizeof(m), self_mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
timespec_store_nsec(ts + 0, self_atime_nsec);
timespec_store_nsec(ts + 1, self_mtime_nsec);
/* Restore original directory timestamps */
if (futimens(dirfd(d), (struct timespec[]) {
ds->st_atim,
ds->st_mtim }) < 0)
if (futimens(dirfd(d), ts) < 0)
log_warning_errno(errno, "Failed to revert timestamps of '%s', ignoring: %m", p);
}
@ -2186,11 +2289,20 @@ static int remove_item(Item *i) {
}
static int clean_item_instance(Item *i, const char* instance) {
_cleanup_closedir_ DIR *d = NULL;
struct stat s, ps;
bool mountpoint;
usec_t cutoff, n;
char timestamp[FORMAT_TIMESTAMP_MAX];
_cleanup_closedir_ DIR *d = NULL;
uint32_t dev_major, dev_minor;
nsec_t atime_nsec, mtime_nsec;
int mountpoint = -1;
usec_t cutoff, n;
uint64_t ino;
struct statx sx
#if HAS_FEATURE_MEMORY_SANITIZER
= {}
# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
#endif
;
assert(i);
@ -2213,24 +2325,53 @@ static int clean_item_instance(Item *i, const char* instance) {
return log_error_errno(errno, "Failed to open directory %s: %m", instance);
}
if (fstat(dirfd(d), &s) < 0)
return log_error_errno(errno, "stat(%s) failed: %m", i->path);
if (statx(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx) < 0) {
struct stat s;
if (!S_ISDIR(s.st_mode))
return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR),
"%s is not a directory.", i->path);
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
return log_error_errno(errno, "statx(%s) failed: %m", i->path);
if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0)
return log_error_errno(errno, "stat(%s/..) failed: %m", i->path);
if (fstat(dirfd(d), &s) < 0)
return log_error_errno(errno, "stat(%s) failed: %m", i->path);
dev_major = major(s.st_dev);
dev_minor = minor(s.st_dev);
ino = s.st_ino;
atime_nsec = timespec_load_nsec(&s.st_atim);
mtime_nsec = timespec_load_nsec(&s.st_mtim);
} else {
if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
dev_major = sx.stx_dev_major;
dev_minor = sx.stx_dev_minor;
ino = sx.stx_ino;
atime_nsec = load_statx_timestamp_nsec(&sx.stx_atime);
mtime_nsec = load_statx_timestamp_nsec(&sx.stx_mtime);
}
if (mountpoint < 0) {
struct stat ps;
if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0)
return log_error_errno(errno, "stat(%s/..) failed: %m", i->path);
mountpoint =
dev_major != major(ps.st_dev) ||
dev_minor != minor(ps.st_dev) ||
ino != ps.st_ino;
}
mountpoint = s.st_dev != ps.st_dev || s.st_ino == ps.st_ino;
log_debug("Cleanup threshold for %s \"%s\" is %s",
mountpoint ? "mount point" : "directory",
instance,
format_timestamp_style(timestamp, sizeof(timestamp), cutoff, TIMESTAMP_US));
return dir_cleanup(i, instance, d, &s, cutoff, s.st_dev, mountpoint,
return dir_cleanup(i, instance, d,
atime_nsec, mtime_nsec, cutoff * NSEC_PER_USEC,
dev_major, dev_minor, mountpoint,
MAX_DEPTH, i->keep_first_level);
}