xattr-util: use crtime/btime if statx() is available for implementation of fd_setcrtime() and friends

The Linux kernel exposes the birth time now for files through statx()
hence make use of it where available. We keep the xattr logic in place
for this however, since only a subset of file systems on Linux currently
expose the birth time. NFS and tmpfs for example do not support it. OTOH
there are other file systems that do support the birth time but might
not support xattrs (smb…), hence make the best of the two, in particular
in order to deal with journal files copied between file system types and
to maintain compatibility with older file systems that are updated to
newer version of the file system.
This commit is contained in:
Lennart Poettering 2018-02-20 12:48:33 +01:00
parent 1133dea477
commit 4c2e1b399f
6 changed files with 161 additions and 45 deletions

View File

@ -449,6 +449,8 @@ decl_headers = '''
#include <uchar.h>
#include <linux/ethtool.h>
#include <linux/fib_rules.h>
#include <linux/stat.h>
#include <sys/stat.h>
'''
# FIXME: key_serial_t is only defined in keyutils.h, this is bound to fail
@ -457,6 +459,7 @@ foreach decl : ['char16_t',
'key_serial_t',
'struct ethtool_link_settings',
'struct fib_rule_uid_range',
'struct statx',
]
# We get -1 if the size cannot be determined
@ -519,6 +522,9 @@ foreach ident : [
['bpf', '''#include <sys/syscall.h>
#include <unistd.h>'''],
['explicit_bzero' , '''#include <string.h>'''],
['statx', '''#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>'''],
]
have = cc.has_function(ident[0], prefix : ident[1], args : '-D_GNU_SOURCE')

View File

@ -34,10 +34,12 @@
#include <linux/neighbour.h>
#include <linux/oom.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/ethernet.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <uchar.h>
#include <unistd.h>
@ -1372,4 +1374,43 @@ struct fib_rule_uid_range {
#define PF_KTHREAD 0x00200000
#endif
#if ! HAVE_STRUCT_STATX
struct statx_timestamp {
int64_t tv_sec;
uint32_t tv_nsec;
uint32_t __reserved;
};
struct statx {
uint32_t stx_mask;
uint32_t stx_blksize;
uint64_t stx_attributes;
uint32_t stx_nlink;
uint32_t stx_uid;
uint32_t stx_gid;
uint16_t stx_mode;
uint16_t __spare0[1];
uint64_t stx_ino;
uint64_t stx_size;
uint64_t stx_blocks;
uint64_t stx_attributes_mask;
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
uint32_t stx_rdev_major;
uint32_t stx_rdev_minor;
uint32_t stx_dev_major;
uint32_t stx_dev_minor;
uint64_t __spare2[14];
};
#endif
#ifndef STATX_BTIME
#define STATX_BTIME 0x00000800U
#endif
#ifndef AT_STATX_DONT_SYNC
#define AT_STATX_DONT_SYNC 0x4000
#endif
#include "missing_syscall.h"

View File

@ -383,3 +383,26 @@ static inline int bpf(int cmd, union bpf_attr *attr, size_t size) {
# endif
# endif
#endif
#if !HAVE_STATX
# ifndef __NR_statx
# if defined __i386__
# define __NR_bpf 383
# elif defined __x86_64__
# define __NR_bpf 332
# else
# warning "__NR_statx not defined for your architecture"
# endif
# endif
struct statx;
static inline ssize_t statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) {
# ifdef __NR_statx
return syscall(__NR_statx, dfd, filename, flags, mask, buffer);
# else
errno = ENOSYS;
return -1;
# endif
}
#endif

View File

@ -20,6 +20,7 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/stat.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@ -29,6 +30,7 @@
#include "alloc-util.h"
#include "fd-util.h"
#include "macro.h"
#include "missing.h"
#include "sparse-endian.h"
#include "stdio-util.h"
#include "string-util.h"
@ -148,52 +150,66 @@ static int parse_crtime(le64_t le, usec_t *usec) {
return 0;
}
int fd_getcrtime(int fd, usec_t *usec) {
int fd_getcrtime_at(int dirfd, const char *name, usec_t *ret, int flags) {
struct statx sx;
usec_t a, b;
le64_t le;
ssize_t n;
int r;
assert(fd >= 0);
assert(usec);
assert(ret);
/* Until Linux gets a real concept of birthtime/creation time,
* let's fake one with xattrs */
if (flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW))
return -EINVAL;
n = fgetxattr(fd, "user.crtime_usec", &le, sizeof(le));
if (n < 0)
return -errno;
if (n != sizeof(le))
return -EIO;
/* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept
* on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was
* implemented on various file systems on the lower level since a while, but never was accessible). However, we
* needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a
* long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first
* time, where it is available. Thius function will read it, but it tries to keep some compatibility with older
* systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the
* concept is useful for determining how "old" a file really is, and hence using the older of the two makes
* most sense. */
return parse_crtime(le, usec);
}
int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags) {
le64_t le;
ssize_t n;
if (statx(dirfd, strempty(name), flags|AT_STATX_DONT_SYNC, STATX_BTIME, &sx) >= 0 &&
(sx.stx_mask & STATX_BTIME) &&
sx.stx_btime.tv_sec != 0)
a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC +
(usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC;
else
a = USEC_INFINITY;
n = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags);
if (n < 0)
return -errno;
if (n != sizeof(le))
return -EIO;
r = -errno;
else if (n != sizeof(le))
r = -EIO;
else
r = parse_crtime(le, &b);
if (r < 0) {
if (a != USEC_INFINITY) {
*ret = a;
return 0;
}
return parse_crtime(le, usec);
return r;
}
if (a != USEC_INFINITY)
*ret = MIN(a, b);
else
*ret = b;
return 0;
}
int path_getcrtime(const char *p, usec_t *usec) {
le64_t le;
ssize_t n;
int fd_getcrtime(int fd, usec_t *ret) {
return fd_getcrtime_at(fd, NULL, ret, AT_EMPTY_PATH);
}
assert(p);
assert(usec);
n = getxattr(p, "user.crtime_usec", &le, sizeof(le));
if (n < 0)
return -errno;
if (n != sizeof(le))
return -EIO;
return parse_crtime(le, usec);
int path_getcrtime(const char *p, usec_t *ret) {
return fd_getcrtime_at(AT_FDCWD, p, ret, 0);
}
int fd_setcrtime(int fd, usec_t usec) {
@ -201,7 +217,7 @@ int fd_setcrtime(int fd, usec_t usec) {
assert(fd >= 0);
if (usec <= 0)
if (IN_SET(usec, 0, USEC_INFINITY))
usec = now(CLOCK_REALTIME);
le = htole64((uint64_t) usec);

View File

@ -3316,17 +3316,12 @@ int journal_file_open(
(void) journal_file_warn_btrfs(f);
/* Let's attach the creation time to the journal file,
* so that the vacuuming code knows the age of this
* file even if the file might end up corrupted one
* day... Ideally we'd just use the creation time many
* file systems maintain for each file, but there is
* currently no usable API to query this, hence let's
* emulate this via extended attributes. If extended
* attributes are not supported we'll just skip this,
* and rely solely on mtime/atime/ctime of the file. */
fd_setcrtime(f->fd, 0);
/* Let's attach the creation time to the journal file, so that the vacuuming code knows the age of this
* file even if the file might end up corrupted one day... Ideally we'd just use the creation time many
* file systems maintain for each file, but the API to query this is very new, hence let's emulate this
* via extended attributes. If extended attributes are not supported we'll just skip this, and rely
* solely on mtime/atime/ctime of the file. */
(void) fd_setcrtime(f->fd, 0);
#if HAVE_GCRYPT
/* Try to load the FSPRG state, and if we can't, then

View File

@ -26,6 +26,7 @@
#include "alloc-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "macro.h"
#include "string-util.h"
@ -63,8 +64,42 @@ cleanup:
assert_se(rmdir(t) >= 0);
}
static void test_getcrtime(void) {
_cleanup_close_ int fd = -1;
char ts[FORMAT_TIMESTAMP_MAX];
const char *vt;
usec_t usec, k;
int r;
assert_se(tmp_dir(&vt) >= 0);
fd = open_tmpfile_unlinkable(vt, O_RDWR);
assert_se(fd >= 0);
r = fd_getcrtime(fd, &usec);
if (r < 0)
log_debug_errno(r, "btime: %m");
else
log_debug("btime: %s", format_timestamp(ts, sizeof(ts), usec));
k = now(CLOCK_REALTIME);
r = fd_setcrtime(fd, 1519126446UL * USEC_PER_SEC);
if (!IN_SET(r, -EOPNOTSUPP, -ENOTTY)) {
assert_se(fd_getcrtime(fd, &usec) >= 0);
assert_se(k < 1519126446UL * USEC_PER_SEC ||
usec == 1519126446UL * USEC_PER_SEC);
}
}
int main(void) {
log_set_max_level(LOG_DEBUG);
log_parse_environment();
log_open();
test_fgetxattrat_fake();
test_getcrtime();
return 0;
}