sleep: Add support for setting a disk offset when hibernating

The Linux kernel is adding support for configuring the offset
into a disk.  This allows swapfiles to be more usable as users
will no longer need to set the offset on their kernel command
line.

Use this API in systemd when hibernating as well.

Signed-off-by: Mario Limonciello <mario.limonciello@dell.com>
This commit is contained in:
Mario Limonciello 2018-03-08 02:41:50 -06:00
parent c75436067f
commit 17c40b3a8f
4 changed files with 220 additions and 16 deletions

View File

@ -20,6 +20,7 @@
***/
#include <errno.h>
#include <linux/fs.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
@ -185,13 +186,10 @@ int can_sleep_disk(char **types) {
#define HIBERNATION_SWAP_THRESHOLD 0.98
static int hibernation_partition_size(size_t *size, size_t *used) {
int find_hibernate_location(char **device, char **type, size_t *size, size_t *used) {
_cleanup_fclose_ FILE *f;
unsigned i;
assert(size);
assert(used);
f = fopen("/proc/swaps", "re");
if (!f) {
log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
@ -203,7 +201,7 @@ static int hibernation_partition_size(size_t *size, size_t *used) {
(void) fscanf(f, "%*s %*s %*s %*s %*s\n");
for (i = 1;; i++) {
_cleanup_free_ char *dev = NULL, *type = NULL;
_cleanup_free_ char *dev_field = NULL, *type_field = NULL;
size_t size_field, used_field;
int k;
@ -213,7 +211,7 @@ static int hibernation_partition_size(size_t *size, size_t *used) {
"%zu " /* swap size */
"%zu " /* used */
"%*i\n", /* priority */
&dev, &type, &size_field, &used_field);
&dev_field, &type_field, &size_field, &used_field);
if (k != 4) {
if (k == EOF)
break;
@ -222,13 +220,18 @@ static int hibernation_partition_size(size_t *size, size_t *used) {
continue;
}
if (streq(type, "partition") && endswith(dev, "\\040(deleted)")) {
log_warning("Ignoring deleted swapfile '%s'.", dev);
if (streq(type_field, "partition") && endswith(dev_field, "\\040(deleted)")) {
log_warning("Ignoring deleted swapfile '%s'.", dev_field);
continue;
}
*size = size_field;
*used = used_field;
if (device)
*device = TAKE_PTR(dev_field);
if (type)
*type = TAKE_PTR(type_field);
if (size)
*size = size_field;
if (used)
*used = used_field;
return 0;
}
@ -245,7 +248,7 @@ static bool enough_memory_for_hibernation(void) {
if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0)
return true;
r = hibernation_partition_size(&size, &used);
r = find_hibernate_location(NULL, NULL, &size, &used);
if (r < 0)
return false;
@ -269,6 +272,95 @@ static bool enough_memory_for_hibernation(void) {
return r;
}
int read_fiemap(int fd, struct fiemap **ret) {
_cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL;
int extents_size;
struct stat statinfo;
uint32_t result_extents = 0;
uint64_t fiemap_start = 0, fiemap_length;
size_t fiemap_size = 1, result_fiemap_size = 1;
if (fstat(fd, &statinfo) < 0)
return log_debug_errno(errno, "Cannot determine file size: %m");
if (!S_ISREG(statinfo.st_mode))
return -ENOTTY;
fiemap_length = statinfo.st_size;
/* zero this out in case we run on a file with no extents */
fiemap = new0(struct fiemap, 1);
if (!fiemap)
return -ENOMEM;
result_fiemap = new(struct fiemap, 1);
if (!result_fiemap)
return -ENOMEM;
/* XFS filesystem has incorrect implementation of fiemap ioctl and
* returns extents for only one block-group at a time, so we need
* to handle it manually, starting the next fiemap call from the end
* of the last extent
*/
while (fiemap_start < fiemap_length) {
*fiemap = (struct fiemap) {
.fm_start = fiemap_start,
.fm_length = fiemap_length,
.fm_flags = FIEMAP_FLAG_SYNC,
};
/* Find out how many extents there are */
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
return log_debug_errno(errno, "Failed to read extents: %m");
/* Nothing to process */
if (fiemap->fm_mapped_extents == 0)
break;
/* Result fiemap has to hold all the extents for the whole file */
extents_size = DIV_ROUND_UP(sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents,
sizeof(struct fiemap));
/* Resize fiemap to allow us to read in the extents */
if (!GREEDY_REALLOC0(fiemap, fiemap_size, extents_size))
return -ENOMEM;
fiemap->fm_extent_count = fiemap->fm_mapped_extents;
fiemap->fm_mapped_extents = 0;
if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
return log_debug_errno(errno, "Failed to read extents: %m");
extents_size = DIV_ROUND_UP(sizeof(struct fiemap_extent) * (result_extents + fiemap->fm_mapped_extents),
sizeof(struct fiemap));
/* Resize result_fiemap to allow us to read in the extents */
if (!GREEDY_REALLOC(result_fiemap, result_fiemap_size,
extents_size))
return -ENOMEM;
memcpy(result_fiemap->fm_extents + result_extents,
fiemap->fm_extents,
sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents);
result_extents += fiemap->fm_mapped_extents;
/* Highly unlikely that it is zero */
if (fiemap->fm_mapped_extents > 0) {
uint32_t i = fiemap->fm_mapped_extents - 1;
fiemap_start = fiemap->fm_extents[i].fe_logical +
fiemap->fm_extents[i].fe_length;
if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
break;
}
}
memcpy(result_fiemap, fiemap, sizeof(struct fiemap));
result_fiemap->fm_mapped_extents = result_extents;
*ret = TAKE_PTR(result_fiemap);
return 0;
}
static bool can_s2h(void) {
int r;

View File

@ -20,9 +20,12 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <linux/fiemap.h>
#include "time-util.h"
int read_fiemap(int fd, struct fiemap **ret);
int parse_sleep_config(const char *verb, char ***modes, char ***states, usec_t *delay);
int find_hibernate_location(char **device, char **type, size_t *size, size_t *used);
int can_sleep(const char *verb);
int can_sleep_disk(char **types);

View File

@ -4,6 +4,7 @@
Copyright 2012 Lennart Poettering
Copyright 2013 Zbigniew Jędrzejewski-Szmek
Copyright 2010-2017 Canonical
Copyright 2018 Dell Inc.
systemd is free software; you can redistribute it and/or modify it
@ -22,6 +23,7 @@
#include <errno.h>
#include <getopt.h>
#include <linux/fiemap.h>
#include <stdio.h>
#include "sd-messages.h"
@ -40,6 +42,67 @@
static char* arg_verb = NULL;
static int write_hibernate_location_info(void) {
_cleanup_free_ char *device = NULL, *type = NULL;
_cleanup_free_ struct fiemap *fiemap = NULL;
char offset_str[DECIMAL_STR_MAX(uint64_t)];
char device_str[DECIMAL_STR_MAX(uint64_t)];
_cleanup_close_ int fd = -1;
struct stat stb;
uint64_t offset;
int r;
r = find_hibernate_location(&device, &type, NULL, NULL);
if (r < 0)
return log_debug_errno(r, "Unable to find hibernation location: %m");
/* if it's a swap partition, we just write the disk to /sys/power/resume */
if (streq(type, "partition"))
return write_string_file("/sys/power/resume", device, 0);
else if (!streq(type, "file"))
return log_debug_errno(EINVAL, "Invalid hibernate type %s: %m",
type);
/* Only available in 4.17+ */
if (access("/sys/power/resume_offset", F_OK) < 0) {
if (errno == ENOENT)
return 0;
return log_debug_errno(errno, "/sys/power/resume_offset unavailable: %m");
}
r = access("/sys/power/resume_offset", W_OK);
if (r < 0)
return log_debug_errno(errno, "/sys/power/resume_offset not writeable: %m");
fd = open(device, O_RDONLY | O_CLOEXEC | O_NONBLOCK);
if (fd < 0)
return log_debug_errno(errno, "Unable to open '%s': %m", device);
r = fstat(fd, &stb);
if (r < 0)
return log_debug_errno(errno, "Unable to stat %s: %m", device);
r = read_fiemap(fd, &fiemap);
if (r < 0)
return log_debug_errno(r, "Unable to read extent map for '%s': %m",
device);
if (fiemap->fm_mapped_extents == 0) {
log_debug("No extents found in '%s'", device);
return -EINVAL;
}
offset = fiemap->fm_extents[0].fe_physical / page_size();
xsprintf(offset_str, "%" PRIu64, offset);
r = write_string_file("/sys/power/resume_offset", offset_str, 0);
if (r < 0)
return log_debug_errno(r, "Failed to write offset '%s': %m",
offset_str);
xsprintf(device_str, "%lx", (unsigned long)stb.st_dev);
r = write_string_file("/sys/power/resume", device_str, 0);
if (r < 0)
return log_debug_errno(r, "Failed to write device '%s': %m",
device_str);
return 0;
}
static int write_mode(char **modes) {
int r = 0;
char **mode;
@ -110,9 +173,14 @@ static int execute(char **modes, char **states) {
return log_error_errno(errno, "Failed to open /sys/power/state: %m");
/* Configure the hibernation mode */
r = write_mode(modes);
if (r < 0)
return r;
if (!strv_isempty(modes)) {
r = write_hibernate_location_info();
if (r < 0)
return log_error_errno(r, "Failed to write hibernation disk offset: %m");
r = write_mode(modes);
if (r < 0)
return r;
}
execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);

View File

@ -18,13 +18,43 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <linux/fiemap.h>
#include <stdio.h>
#include "fd-util.h"
#include "log.h"
#include "sleep-config.h"
#include "strv.h"
#include "util.h"
static int test_fiemap(const char *path) {
_cleanup_free_ struct fiemap *fiemap = NULL;
_cleanup_close_ int fd = -1;
int r;
fd = open(path, O_RDONLY | O_CLOEXEC | O_NONBLOCK);
if (fd < 0)
return log_error_errno(errno, "failed to open %s: %m", path);
r = read_fiemap(fd, &fiemap);
if (r == -ENOTSUP) {
log_info("Skipping test, not supported");
exit(EXIT_TEST_SKIP);
}
if (r < 0)
return log_error_errno(r, "Unable to read extent map for '%s': %m", path);
log_info("extent map information for %s:", path);
log_info("\t start: %llu", fiemap->fm_start);
log_info("\t length: %llu", fiemap->fm_length);
log_info("\t flags: %u", fiemap->fm_flags);
log_info("\t number of mapped extents: %u", fiemap->fm_mapped_extents);
log_info("\t extent count: %u", fiemap->fm_extent_count);
if (fiemap->fm_extent_count > 0)
log_info("\t first extent location: %llu",
fiemap->fm_extents[0].fe_physical / page_size());
return 0;
}
static void test_sleep(void) {
_cleanup_strv_free_ char
**standby = strv_new("standby", NULL),
@ -52,6 +82,8 @@ static void test_sleep(void) {
}
int main(int argc, char* argv[]) {
int i, r = 0, k;
log_parse_environment();
log_open();
@ -60,5 +92,14 @@ int main(int argc, char* argv[]) {
test_sleep();
return 0;
if (argc <= 1)
assert_se(test_fiemap(argv[0]) == 0);
else
for (i = 1; i < argc; i++) {
k = test_fiemap(argv[i]);
if (r == 0)
r = k;
}
return r;
}