diff --git a/TODO b/TODO index ed2a93e8db..683f94a5c0 100644 --- a/TODO +++ b/TODO @@ -178,9 +178,6 @@ Features: * systemd-repart: allow config of partition uuid -* systemd-repart: add --make= switch for fallocating a new file of the - specified size first. - * userdb: allow username prefix searches in varlink API, allow realname and realname substr searches in varlink API diff --git a/man/systemd-repart.xml b/man/systemd-repart.xml index f55be4f328..74c6ba95b1 100644 --- a/man/systemd-repart.xml +++ b/man/systemd-repart.xml @@ -58,7 +58,7 @@ The root partition may be grown to cover the whole available disk space - A /home/, swap or /srv partition can be added in + A /home/, swap or /srv/ partition can be added in A second (or third, …) root partition may be added in, to cover A/B style setups where a second version of the root file system is alternatingly used for implementing update schemes. The deployed image would carry only a single partition ("A") but on first boot a second @@ -145,6 +145,12 @@ also be set explicitly, formatted as UUID via the option. By hashing these UUIDs from a common seed images prepared with this tool become reproducible and the result of the algorithm above deterministic. + + The positional argument should specify the block device to operate on. Instead of a block device + node path a regular file may be specified too, in which case the command operates on it like it would if + a loopback block device node was specified with the file attached. If is + specified the specified path is created as regular file, which is useful for generating disk images from + scratch. @@ -165,9 +171,9 @@ Takes one of refuse, allow, - require or force. Controls how to operate on block devices that - are entirely empty, i.e. carry no partition table/disk label yet. If this switch is not specified the - implied default is refuse. + require, force or create. Controls how to + operate on block devices that are entirely empty, i.e. carry no partition table/disk label yet. If + this switch is not specified the implied default is refuse. If refuse systemd-repart requires that the block device it shall operate on already carries a partition table and refuses operation if none is found. If @@ -176,7 +182,9 @@ exists so far, and refuse operation if one already exists. If force it will create a fresh partition table unconditionally, erasing the disk fully in effect. If force no existing partitions will be taken into account or survive the - operation. Hence: use with care, this is a great way to lose all your data. + operation. Hence: use with care, this is a great way to lose all your data. If + create a new loopback file is create under the path passed via the device node + parameter, of the size indicated with , see below. @@ -186,7 +194,20 @@ the implied default. Controls whether to issue the BLKDISCARD I/O control command on the space taken up by any added partitions or on the space in between them. Usually, it's a good idea to issue this request since it tells the underlying hardware that the covered blocks - shall be considered empty, improving performance. + shall be considered empty, improving performance. If operating on a regular file instead of a block + device node, a sparse file is generated. + + + + + + Takes a size in bytes, using the usual K, M, G, T suffixes. If used the specified + device node path must refer to a regular file, which is then grown to the specified size if smaller, + before any change is made to the partition table. This is not supported if the specified node is a + block device. This switch has no effect if the file is already as large as the specified size or + larger. The specified size is implicitly rounded up to multiples of 4096. When used with + this specifies the initial size of the loopback file to + create. diff --git a/src/partition/repart.c b/src/partition/repart.c index 82d60cd42b..4c738920ab 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -58,6 +58,7 @@ static enum { EMPTY_ALLOW, /* allow empty disks, create partition table if necessary */ EMPTY_REQUIRE, /* require an empty disk, create a partition table */ EMPTY_FORCE, /* make disk empty, erase everything, create a partition table always */ + EMPTY_CREATE, /* create disk as loopback file, create a partition table always */ } arg_empty = EMPTY_REFUSE; static bool arg_dry_run = true; @@ -70,6 +71,7 @@ static int arg_factory_reset = -1; static sd_id128_t arg_seed = SD_ID128_NULL; static bool arg_randomize = false; static int arg_pretty = -1; +static uint64_t arg_size = UINT64_MAX; STATIC_DESTRUCTOR_REGISTER(arg_root, freep); STATIC_DESTRUCTOR_REGISTER(arg_definitions, freep); @@ -1166,7 +1168,11 @@ static int disk_acquire_uuid(Context *context, sd_id128_t *ret) { return 0; } -static int context_load_partition_table(Context *context, const char *node) { +static int context_load_partition_table( + Context *context, + const char *node, + int *backing_fd) { + _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL; uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors; @@ -1178,14 +1184,31 @@ static int context_load_partition_table(Context *context, const char *node) { assert(context); assert(node); + assert(backing_fd); c = fdisk_new_context(); if (!c) return log_oom(); - r = fdisk_assign_device(c, node, arg_dry_run); + /* libfdisk doesn't have an API to operate on arbitrary fds, hence reopen the fd going via the + * /proc/self/fd/ magic path if we have an existing fd. Open the original file otherwise. */ + if (*backing_fd < 0) + r = fdisk_assign_device(c, node, arg_dry_run); + else { + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + xsprintf(procfs_path, "/proc/self/fd/%i", *backing_fd); + + r = fdisk_assign_device(c, procfs_path, arg_dry_run); + } if (r < 0) - return log_error_errno(r, "Failed to open device: %m"); + return log_error_errno(r, "Failed to open device '%s': %m", node); + + if (*backing_fd < 0) { + /* If we have no fd referencing the device yet, make a copy of the fd now, so that we have one */ + *backing_fd = fcntl(fdisk_get_devfd(c), F_DUPFD_CLOEXEC, 3); + if (*backing_fd < 0) + return log_error_errno(errno, "Failed to duplicate fdisk fd: %m"); + } /* Tell udev not to interfere while we are processing the device */ if (flock(fdisk_get_devfd(c), arg_dry_run ? LOCK_SH : LOCK_EX) < 0) @@ -1225,6 +1248,7 @@ static int context_load_partition_table(Context *context, const char *node) { break; case EMPTY_FORCE: + case EMPTY_CREATE: /* Always reinitiaize the disk, don't consider what there was on the disk before */ from_scratch = true; break; @@ -1900,8 +1924,7 @@ static int context_discard_range(Context *context, uint64_t offset, uint64_t siz if (size <= 0) return 0; - fd = fdisk_get_devfd(context->fdisk_context); - assert(fd >= 0); + assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0); if (fstat(fd, &st) < 0) return -errno; @@ -2256,7 +2279,7 @@ static int device_kernel_partitions_supported(int fd) { if (fstat(fd, &st) < 0) return log_error_errno(fd, "Failed to fstat() image file: %m"); if (!S_ISBLK(st.st_mode)) - return false; + return -ENOTBLK; /* we do not log in this one special case about errors */ if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) { @@ -2461,9 +2484,11 @@ static int context_write_partition_table( return log_error_errno(r, "Failed to write partition table: %m"); capable = device_kernel_partitions_supported(fdisk_get_devfd(context->fdisk_context)); - if (capable < 0) + if (capable == -ENOTBLK) + log_debug("Not telling kernel to reread partition table, since we are not operating on a block device."); + else if (capable < 0) return capable; - if (capable > 0) { + else if (capable > 0) { log_info("Telling kernel to reread partition table."); if (from_scratch) @@ -2588,8 +2613,8 @@ static int help(void) { " -h --help Show this help\n" " --version Show package version\n" " --dry-run=BOOL Whether to run dry-run operation\n" - " --empty=MODE One of refuse, allow, require, force; controls how to\n" - " handle empty disks lacking partition table\n" + " --empty=MODE One of refuse, allow, require, force, create; controls\n" + " how to handle empty disks lacking partition tables\n" " --discard=BOOL Whether to discard backing blocks for new partitions\n" " --pretty=BOOL Whether to show pretty summary before executing operation\n" " --factory-reset=BOOL Whether to remove data partitions before recreating\n" @@ -2598,6 +2623,7 @@ static int help(void) { " --root=PATH Operate relative to root path\n" " --definitions=DIR Find partitions in specified directory\n" " --seed=UUID 128bit seed UUID to derive all UUIDs from\n" + " --size=BYTES Grow loopback file to specified size\n" "\nSee the %s for details.\n" , program_invocation_short_name , ansi_highlight(), ansi_normal() @@ -2620,6 +2646,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_SEED, ARG_PRETTY, ARG_DEFINITIONS, + ARG_SIZE, }; static const struct option options[] = { @@ -2634,10 +2661,11 @@ static int parse_argv(int argc, char *argv[]) { { "seed", required_argument, NULL, ARG_SEED }, { "pretty", required_argument, NULL, ARG_PRETTY }, { "definitions", required_argument, NULL, ARG_DEFINITIONS }, + { "size", required_argument, NULL, ARG_SIZE }, {} }; - int c, r; + int c, r, dry_run = -1; assert(argc >= 0); assert(argv); @@ -2657,7 +2685,7 @@ static int parse_argv(int argc, char *argv[]) { if (r < 0) return log_error_errno(r, "Failed to parse --dry-run= parameter: %s", optarg); - arg_dry_run = r; + dry_run = r; break; case ARG_EMPTY: @@ -2669,7 +2697,14 @@ static int parse_argv(int argc, char *argv[]) { arg_empty = EMPTY_REQUIRE; else if (streq(optarg, "force")) arg_empty = EMPTY_FORCE; - else + else if (streq(optarg, "create")) { + arg_empty = EMPTY_CREATE; + + if (dry_run < 0) + dry_run = false; /* Imply --dry-run=no if we create the loopback file + * anew. After all we cannot really break anyone's + * partition tables that way. */ + } else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse --empty= parameter: %s", optarg); break; @@ -2730,6 +2765,27 @@ static int parse_argv(int argc, char *argv[]) { return r; break; + case ARG_SIZE: { + uint64_t parsed, rounded; + + r = parse_size(optarg, 1024, &parsed); + if (r < 0) + return log_error_errno(r, "Failed to parse --size= parameter: %s", optarg); + + rounded = round_up_size(parsed, 4096); + if (rounded == 0) + return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too small, refusing."); + if (rounded == UINT64_MAX) + return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too large, refusing."); + + if (rounded != parsed) + log_warning("Specified size is not a multiple of 4096, rounding up automatically. (%" PRIu64 " → %" PRIu64 ")", + parsed, rounded); + + arg_size = rounded; + break; + } + case '?': return -EINVAL; @@ -2741,14 +2797,27 @@ static int parse_argv(int argc, char *argv[]) { return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected at most one argument, the path to the block device."); - if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE)) + if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Combination of --factory-reset=yes and --empty=force/--empty=require is invalid."); + "Combination of --factory-reset=yes and --empty=force/--empty=require/--empty=create is invalid."); if (arg_can_factory_reset) - arg_dry_run = true; + arg_dry_run = true; /* When --can-factory-reset is specified we don't make changes, hence + * non-dry-run mode makes no sense. Thus, imply dry run mode so that we + * open things strictly read-only. */ + else if (dry_run >= 0) + arg_dry_run = dry_run; + + if (arg_empty == EMPTY_CREATE && arg_size == UINT64_MAX) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "If --empty=create is specified, --size= must be specified, too."); arg_node = argc > optind ? argv[optind] : NULL; + + if (IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE) && !arg_node) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "A path to a device node or loopback file must be specified when --empty=force, --empty=require or --empty=create are used."); + return 1; } @@ -2815,12 +2884,16 @@ static int remove_efi_variable_factory_reset(void) { return 0; } -static int acquire_root_devno(const char *p, int mode, char **ret) { +static int acquire_root_devno(const char *p, int mode, char **ret, int *ret_fd) { _cleanup_close_ int fd = -1; struct stat st; - dev_t devno; + dev_t devno, fd_devno = (mode_t) -1; int r; + assert(p); + assert(ret); + assert(ret_fd); + fd = open(p, mode); if (fd < 0) return -errno; @@ -2836,23 +2909,23 @@ static int acquire_root_devno(const char *p, int mode, char **ret) { return log_oom(); *ret = s; + *ret_fd = TAKE_FD(fd); + return 0; } if (S_ISBLK(st.st_mode)) - devno = st.st_rdev; + fd_devno = devno = st.st_rdev; else if (S_ISDIR(st.st_mode)) { devno = st.st_dev; - - if (major(st.st_dev) == 0) { + if (major(devno) == 0) { r = btrfs_get_block_device_fd(fd, &devno); if (r == -ENOTTY) /* not btrfs */ return -ENODEV; if (r < 0) return r; } - } else return -ENOTBLK; @@ -2866,21 +2939,50 @@ static int acquire_root_devno(const char *p, int mode, char **ret) { if (r < 0) log_debug_errno(r, "Failed to find whole disk block device for '%s', ignoring: %m", p); - return device_path_make_canonical(S_IFBLK, devno, ret); + r = device_path_make_canonical(S_IFBLK, devno, ret); + if (r < 0) + return log_debug_errno(r, "Failed to determine canonical path for '%s': %m", p); + + /* Only if we still lock at the same block device we can reuse the fd. Otherwise return an + * invalidated fd. */ + *ret_fd = fd_devno != (mode_t) -1 && fd_devno == devno ? TAKE_FD(fd) : -1; + return 0; } -static int find_root(char **ret) { +static int find_root(char **ret, int *ret_fd) { const char *t; int r; + assert(ret); + assert(ret_fd); + if (arg_node) { - r = acquire_root_devno(arg_node, O_RDONLY|O_CLOEXEC, ret); + if (arg_empty == EMPTY_CREATE) { + _cleanup_close_ int fd = -1; + _cleanup_free_ char *s = NULL; + + s = strdup(arg_node); + if (!s) + return log_oom(); + + fd = open(arg_node, O_RDONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOFOLLOW, 0777); + if (fd < 0) + return log_error_errno(errno, "Failed to create '%s': %m", arg_node); + + *ret = TAKE_PTR(s); + *ret_fd = TAKE_FD(fd); + return 0; + } + + r = acquire_root_devno(arg_node, O_RDONLY|O_CLOEXEC, ret, ret_fd); if (r < 0) return log_error_errno(r, "Failed to determine backing device of %s: %m", arg_node); return 0; } + assert(IN_SET(arg_empty, EMPTY_REFUSE, EMPTY_ALLOW)); + /* Let's search for the root device. We look for two cases here: first in /, and then in /usr. The * latter we check for cases where / is a tmpfs and only /usr is an actual persistent block device * (think: volatile setups) */ @@ -2898,7 +3000,7 @@ static int find_root(char **ret) { } else p = t; - r = acquire_root_devno(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC, ret); + r = acquire_root_devno(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC, ret, ret_fd); if (r < 0) { if (r != -ENODEV) return log_error_errno(r, "Failed to determine backing device of %s: %m", p); @@ -2909,9 +3011,83 @@ static int find_root(char **ret) { return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "Failed to discover root block device."); } +static int resize_backing_fd(const char *node, int *fd) { + char buf1[FORMAT_BYTES_MAX], buf2[FORMAT_BYTES_MAX]; + _cleanup_close_ int writable_fd = -1; + struct stat st; + int r; + + assert(node); + assert(fd); + + if (arg_size == UINT64_MAX) /* Nothing to do */ + return 0; + + if (*fd < 0) { + /* Open the file if we haven't opened it yet. Note that we open it read-only here, just to + * keep a reference to the file we can pass around. */ + *fd = open(node, O_RDONLY|O_CLOEXEC); + if (*fd < 0) + return log_error_errno(errno, "Failed to open '%s' in order to adjust size: %m", node); + } + + if (fstat(*fd, &st) < 0) + return log_error_errno(errno, "Failed to stat '%s': %m", node); + + r = stat_verify_regular(&st); + if (r < 0) + return log_error_errno(r, "Specified path '%s' is not a regular file, cannot resize: %m", node); + + assert_se(format_bytes(buf1, sizeof(buf1), st.st_size)); + assert_se(format_bytes(buf2, sizeof(buf2), arg_size)); + + if ((uint64_t) st.st_size >= arg_size) { + log_info("File '%s' already is of requested size or larger, not growing. (%s >= %s)", node, buf1, buf2); + return 0; + } + + /* The file descriptor is read-only. In order to grow the file we need to have a writable fd. We + * reopen the file for that temporarily. We keep the writable fd only open for this operation though, + * as fdisk can't accept it anyway. */ + + writable_fd = fd_reopen(*fd, O_WRONLY|O_CLOEXEC); + if (writable_fd < 0) + return log_error_errno(writable_fd, "Failed to reopen backing file '%s' writable: %m", node); + + if (!arg_discard) { + if (fallocate(writable_fd, 0, 0, arg_size) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno)) + return log_error_errno(errno, "Failed to grow '%s' from %s to %s by allocation: %m", + node, buf1, buf2); + + /* Fallback to truncation, if fallocate() is not supported. */ + log_debug("Backing file system does not support fallocate(), falling back to ftruncate()."); + } else { + if (st.st_size == 0) /* Likely regular file just created by us */ + log_info("Allocated %s for '%s'.", buf2, node); + else + log_info("File '%s' grown from %s to %s by allocation.", node, buf1, buf2); + + return 1; + } + } + + if (ftruncate(writable_fd, arg_size) < 0) + return log_error_errno(errno, "Failed to grow '%s' from %s to %s by truncation: %m", + node, buf1, buf2); + + if (st.st_size == 0) /* Likely regular file just created by us */ + log_info("Sized '%s' to %s.", node, buf2); + else + log_info("File '%s' grown from %s to %s by truncation.", node, buf1, buf2); + + return 1; +} + static int run(int argc, char *argv[]) { _cleanup_(context_freep) Context* context = NULL; _cleanup_free_ char *node = NULL; + _cleanup_close_ int backing_fd = -1; bool from_scratch; int r; @@ -2946,14 +3122,22 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; - if (context->n_partitions <= 0 && arg_empty != EMPTY_FORCE) + if (context->n_partitions <= 0 && arg_empty == EMPTY_REFUSE) { + log_info("Didn't find any partition definition files, nothing to do."); return 0; + } - r = find_root(&node); + r = find_root(&node, &backing_fd); if (r < 0) return r; - r = context_load_partition_table(context, node); + if (arg_size != UINT64_MAX) { + r = resize_backing_fd(node, &backing_fd); + if (r < 0) + return r; + } + + r = context_load_partition_table(context, node, &backing_fd); if (r == -EHWPOISON) return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't * really an error when called at boot. */ @@ -2982,7 +3166,7 @@ static int run(int argc, char *argv[]) { /* Reload the reduced partition table */ context_unload_partition_table(context); - r = context_load_partition_table(context, node); + r = context_load_partition_table(context, node, &backing_fd); if (r < 0) return r; } diff --git a/src/partition/test-repart.sh b/src/partition/test-repart.sh index 7da6b1b027..58fcc797c4 100755 --- a/src/partition/test-repart.sh +++ b/src/partition/test-repart.sh @@ -8,11 +8,9 @@ D=$(mktemp --directory) trap "rm -rf '$D'" EXIT INT QUIT PIPE mkdir -p $D/definitions -truncate -s 1G $D/zzz - SEED=e2a40bf9-73f1-4278-9160-49c031e7aef8 -$repart $D/zzz --empty=force --dry-run=no --seed=$SEED +$repart $D/zzz --empty=create --size=1G --seed=$SEED sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' > $D/empty @@ -90,9 +88,7 @@ $D/zzz4 : start= 1777624, size= 131072, type=0657FD6D-A4AB-43C4-84E5-09 $D/zzz5 : start= 1908696, size= 188416, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=03477476-06AD-44E8-9EF4-BC2BD7771289, name="linux-generic" EOF -truncate -s 2G $D/zzz - -$repart $D/zzz --dry-run=no --seed=$SEED --definitions=$D/definitions +$repart $D/zzz --size=2G --dry-run=no --seed=$SEED --definitions=$D/definitions sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/populated3