fs-util: add new unlinkat_deallocate() helper

This new helper not only removes a file from a directory but also
ensures its space on disk is deallocated, by either punching a hole over
the full file or truncating the file afterwards if the file's link
counter is 0. This is useful in "vacuuming" algorithms to ensure that
client's can't keep the disk space the vacuuming is supposed to recover
pinned simply by keeping an fd open to it.
This commit is contained in:
Lennart Poettering 2018-02-09 09:50:31 +01:00
parent 7b938dfb8d
commit 43767d9d5e
4 changed files with 106 additions and 0 deletions

View File

@ -887,3 +887,72 @@ int access_fd(int fd, int mode) {
return r;
}
int unlinkat_deallocate(int fd, const char *name, int flags) {
_cleanup_close_ int truncate_fd = -1;
struct stat st;
off_t l, bs;
/* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
* link to it. This is useful to ensure that other processes that might have the file open for reading won't be
* able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
* jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
* returned to the free pool.
*
* Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
* the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
* programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
* underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
* However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
* truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
*
* Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
* primary job to delete the file is accomplished. */
if ((flags & AT_REMOVEDIR) == 0) {
truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
if (truncate_fd < 0) {
/* If this failed because the file doesn't exist propagate the error right-away. Also,
* AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
* returned when this is a directory but we are not supposed to delete those, hence propagate
* the error right-away too. */
if (IN_SET(errno, ENOENT, EISDIR))
return -errno;
if (errno != ELOOP) /* don't complain if this is a symlink */
log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
}
}
if (unlinkat(fd, name, flags) < 0)
return -errno;
if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
return 0;
if (fstat(truncate_fd, &st) < 0) {
log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring.", name);
return 0;
}
if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0)
return 0;
/* If this is a regular file, it actually took up space on disk and there are no other links it's time to
* punch-hole/truncate this to release the disk space. */
bs = MAX(st.st_blksize, 512);
l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
return 0; /* Successfully punched a hole! 😊 */
/* Fall back to truncation */
if (ftruncate(truncate_fd, 0) < 0) {
log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
return 0;
}
return 0;
}

View File

@ -103,3 +103,5 @@ static inline void unlink_and_free(char *p) {
DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free);
int access_fd(int fd, int mode);
int unlinkat_deallocate(int fd, const char *name, int flags);

View File

@ -27,6 +27,7 @@
#include <inttypes.h>
#include <linux/audit.h>
#include <linux/capability.h>
#include <linux/falloc.h>
#include <linux/if_link.h>
#include <linux/input.h>
#include <linux/loop.h>
@ -1359,4 +1360,12 @@ struct fib_rule_uid_range {
#define NS_GET_NSTYPE _IO(0xb7, 0x3)
#endif
#ifndef FALLOC_FL_KEEP_SIZE
#define FALLOC_FL_KEEP_SIZE 0x01
#endif
#ifndef FALLOC_FL_PUNCH_HOLE
#define FALLOC_FL_PUNCH_HOLE 0x02
#endif
#include "missing_syscall.h"

View File

@ -527,6 +527,31 @@ static void test_touch_file(void) {
assert_se(timespec_load(&st.st_mtim) == test_mtime);
}
static void test_unlinkat_deallocate(void) {
_cleanup_free_ char *p = NULL;
_cleanup_close_ int fd = -1;
struct stat st;
assert_se(tempfn_random_child(NULL, "unlink-deallocation", &p) >= 0);
fd = open(p, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL, 0600);
assert_se(fd >= 0);
assert_se(write(fd, "hallo\n", 6) == 6);
assert_se(fstat(fd, &st) >= 0);
assert_se(st.st_size == 6);
assert_se(st.st_blocks > 0);
assert_se(st.st_nlink == 1);
assert_se(unlinkat_deallocate(AT_FDCWD, p, 0) >= 0);
assert_se(fstat(fd, &st) >= 0);
assert_se(IN_SET(st.st_size, 0, 6)); /* depending on whether hole punching worked the size will be 6 (it worked) or 0 (we had to resort to truncation) */
assert_se(st.st_blocks == 0);
assert_se(st.st_nlink == 0);
}
int main(int argc, char *argv[]) {
test_unlink_noerrno();
test_get_files_in_directory();
@ -536,6 +561,7 @@ int main(int argc, char *argv[]) {
test_dot_or_dot_dot();
test_access_fd();
test_touch_file();
test_unlinkat_deallocate();
return 0;
}