From 43767d9d5e0ce8923828aebf9154da7af83916f7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 09:50:31 +0100 Subject: [PATCH] fs-util: add new unlinkat_deallocate() helper This new helper not only removes a file from a directory but also ensures its space on disk is deallocated, by either punching a hole over the full file or truncating the file afterwards if the file's link counter is 0. This is useful in "vacuuming" algorithms to ensure that client's can't keep the disk space the vacuuming is supposed to recover pinned simply by keeping an fd open to it. --- src/basic/fs-util.c | 69 +++++++++++++++++++++++++++++++++++++++++ src/basic/fs-util.h | 2 ++ src/basic/missing.h | 9 ++++++ src/test/test-fs-util.c | 26 ++++++++++++++++ 4 files changed, 106 insertions(+) diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index cf856ae8e2..c96c7d0d25 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -887,3 +887,72 @@ int access_fd(int fd, int mode) { return r; } + +int unlinkat_deallocate(int fd, const char *name, int flags) { + _cleanup_close_ int truncate_fd = -1; + struct stat st; + off_t l, bs; + + /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other + * link to it. This is useful to ensure that other processes that might have the file open for reading won't be + * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up + * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and + * returned to the free pool. + * + * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means + * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other + * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes + * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.) + * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file + * truncation (đŸ”Ē), as our goal of deallocating the data space trumps our goal of being nice to readers (💐). + * + * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the + * primary job – to delete the file – is accomplished. */ + + if ((flags & AT_REMOVEDIR) == 0) { + truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); + if (truncate_fd < 0) { + + /* If this failed because the file doesn't exist propagate the error right-away. Also, + * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is + * returned when this is a directory but we are not supposed to delete those, hence propagate + * the error right-away too. */ + if (IN_SET(errno, ENOENT, EISDIR)) + return -errno; + + if (errno != ELOOP) /* don't complain if this is a symlink */ + log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name); + } + } + + if (unlinkat(fd, name, flags) < 0) + return -errno; + + if (truncate_fd < 0) /* Don't have a file handle, can't do more ☚ī¸ */ + return 0; + + if (fstat(truncate_fd, &st) < 0) { + log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring.", name); + return 0; + } + + if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0) + return 0; + + /* If this is a regular file, it actually took up space on disk and there are no other links it's time to + * punch-hole/truncate this to release the disk space. */ + + bs = MAX(st.st_blksize, 512); + l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */ + + if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0) + return 0; /* Successfully punched a hole! 😊 */ + + /* Fall back to truncation */ + if (ftruncate(truncate_fd, 0) < 0) { + log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m"); + return 0; + } + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 6df3ebffe2..ae40d6d37f 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -103,3 +103,5 @@ static inline void unlink_and_free(char *p) { DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); int access_fd(int fd, int mode); + +int unlinkat_deallocate(int fd, const char *name, int flags); diff --git a/src/basic/missing.h b/src/basic/missing.h index 327e6ea67f..ed884dddad 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1359,4 +1360,12 @@ struct fib_rule_uid_range { #define NS_GET_NSTYPE _IO(0xb7, 0x3) #endif +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif + +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif + #include "missing_syscall.h" diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index 9f3a500080..184a2a52c2 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -527,6 +527,31 @@ static void test_touch_file(void) { assert_se(timespec_load(&st.st_mtim) == test_mtime); } +static void test_unlinkat_deallocate(void) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + struct stat st; + + assert_se(tempfn_random_child(NULL, "unlink-deallocation", &p) >= 0); + + fd = open(p, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL, 0600); + assert_se(fd >= 0); + + assert_se(write(fd, "hallo\n", 6) == 6); + + assert_se(fstat(fd, &st) >= 0); + assert_se(st.st_size == 6); + assert_se(st.st_blocks > 0); + assert_se(st.st_nlink == 1); + + assert_se(unlinkat_deallocate(AT_FDCWD, p, 0) >= 0); + + assert_se(fstat(fd, &st) >= 0); + assert_se(IN_SET(st.st_size, 0, 6)); /* depending on whether hole punching worked the size will be 6 (it worked) or 0 (we had to resort to truncation) */ + assert_se(st.st_blocks == 0); + assert_se(st.st_nlink == 0); +} + int main(int argc, char *argv[]) { test_unlink_noerrno(); test_get_files_in_directory(); @@ -536,6 +561,7 @@ int main(int argc, char *argv[]) { test_dot_or_dot_dot(); test_access_fd(); test_touch_file(); + test_unlinkat_deallocate(); return 0; }