diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index cf856ae8e2..c96c7d0d25 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -887,3 +887,72 @@ int access_fd(int fd, int mode) { return r; } + +int unlinkat_deallocate(int fd, const char *name, int flags) { + _cleanup_close_ int truncate_fd = -1; + struct stat st; + off_t l, bs; + + /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other + * link to it. This is useful to ensure that other processes that might have the file open for reading won't be + * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up + * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and + * returned to the free pool. + * + * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means + * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other + * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes + * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.) + * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file + * truncation (đŸ”Ē), as our goal of deallocating the data space trumps our goal of being nice to readers (💐). + * + * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the + * primary job – to delete the file – is accomplished. */ + + if ((flags & AT_REMOVEDIR) == 0) { + truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); + if (truncate_fd < 0) { + + /* If this failed because the file doesn't exist propagate the error right-away. Also, + * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is + * returned when this is a directory but we are not supposed to delete those, hence propagate + * the error right-away too. */ + if (IN_SET(errno, ENOENT, EISDIR)) + return -errno; + + if (errno != ELOOP) /* don't complain if this is a symlink */ + log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name); + } + } + + if (unlinkat(fd, name, flags) < 0) + return -errno; + + if (truncate_fd < 0) /* Don't have a file handle, can't do more ☚ī¸ */ + return 0; + + if (fstat(truncate_fd, &st) < 0) { + log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring.", name); + return 0; + } + + if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0) + return 0; + + /* If this is a regular file, it actually took up space on disk and there are no other links it's time to + * punch-hole/truncate this to release the disk space. */ + + bs = MAX(st.st_blksize, 512); + l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */ + + if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0) + return 0; /* Successfully punched a hole! 😊 */ + + /* Fall back to truncation */ + if (ftruncate(truncate_fd, 0) < 0) { + log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m"); + return 0; + } + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 6df3ebffe2..ae40d6d37f 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -103,3 +103,5 @@ static inline void unlink_and_free(char *p) { DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); int access_fd(int fd, int mode); + +int unlinkat_deallocate(int fd, const char *name, int flags); diff --git a/src/basic/missing.h b/src/basic/missing.h index 327e6ea67f..ed884dddad 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1359,4 +1360,12 @@ struct fib_rule_uid_range { #define NS_GET_NSTYPE _IO(0xb7, 0x3) #endif +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif + +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif + #include "missing_syscall.h" diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index 9f3a500080..184a2a52c2 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -527,6 +527,31 @@ static void test_touch_file(void) { assert_se(timespec_load(&st.st_mtim) == test_mtime); } +static void test_unlinkat_deallocate(void) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + struct stat st; + + assert_se(tempfn_random_child(NULL, "unlink-deallocation", &p) >= 0); + + fd = open(p, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL, 0600); + assert_se(fd >= 0); + + assert_se(write(fd, "hallo\n", 6) == 6); + + assert_se(fstat(fd, &st) >= 0); + assert_se(st.st_size == 6); + assert_se(st.st_blocks > 0); + assert_se(st.st_nlink == 1); + + assert_se(unlinkat_deallocate(AT_FDCWD, p, 0) >= 0); + + assert_se(fstat(fd, &st) >= 0); + assert_se(IN_SET(st.st_size, 0, 6)); /* depending on whether hole punching worked the size will be 6 (it worked) or 0 (we had to resort to truncation) */ + assert_se(st.st_blocks == 0); + assert_se(st.st_nlink == 0); +} + int main(int argc, char *argv[]) { test_unlink_noerrno(); test_get_files_in_directory(); @@ -536,6 +561,7 @@ int main(int argc, char *argv[]) { test_dot_or_dot_dot(); test_access_fd(); test_touch_file(); + test_unlinkat_deallocate(); return 0; }