From 866fdcceb484e4b3bffcbf5ab0ea490f06d98e9c Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 7 Jul 2020 17:10:47 +0100 Subject: [PATCH 1/4] test: add another test case for extract_many_words Covers some functionality that we want to use for config tuples --- src/test/test-extract-word.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/test/test-extract-word.c b/src/test/test-extract-word.c index 4ca57555f1..43ad1b7d82 100644 --- a/src/test/test-extract-word.c +++ b/src/test/test-extract-word.c @@ -489,7 +489,7 @@ static void test_extract_first_word_and_warn(void) { static void test_extract_many_words(void) { const char *p, *original; - char *a, *b, *c; + char *a, *b, *c, *d, *e, *f; p = original = "foobar waldi piep"; assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 3); @@ -501,6 +501,24 @@ static void test_extract_many_words(void) { free(b); free(c); + p = original = "foobar:waldi:piep ba1:ba2"; + assert_se(extract_many_words(&p, ":" WHITESPACE, 0, &a, &b, &c, NULL) == 3); + assert_se(!isempty(p)); + assert_se(streq_ptr(a, "foobar")); + assert_se(streq_ptr(b, "waldi")); + assert_se(streq_ptr(c, "piep")); + assert_se(extract_many_words(&p, ":" WHITESPACE, 0, &d, &e, &f, NULL) == 2); + assert_se(isempty(p)); + assert_se(streq_ptr(d, "ba1")); + assert_se(streq_ptr(e, "ba2")); + assert_se(isempty(f)); + free(a); + free(b); + free(c); + free(d); + free(e); + free(f); + p = original = "'foobar' wa\"ld\"i "; assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 2); assert_se(isempty(p)); From 1e198efcdb1423847642e141c8b296e544707a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 3 Aug 2020 19:06:16 +0200 Subject: [PATCH 2/4] basic/extract-word: add EXTRACT_UNESCAPE_SEPARATORS mode This allows separators to be escaped, for example to allow "a\:b:c", to be treated as "a:b", "c" with ":" as the separator. --- src/basic/extract-word.c | 25 +++++++++++++--------- src/basic/extract-word.h | 7 ++++--- src/test/test-extract-word.c | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index ac9bf6099d..1a53da334a 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -86,25 +86,30 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra return -EINVAL; } - if (flags & EXTRACT_CUNESCAPE) { + if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) { bool eight_bit = false; char32_t u; - r = cunescape_one(*p, (size_t) -1, &u, &eight_bit, false); - if (r < 0) { - if (flags & EXTRACT_CUNESCAPE_RELAX) { - s[sz++] = '\\'; - s[sz++] = c; - } else - return -EINVAL; - } else { + if ((flags & EXTRACT_CUNESCAPE) && + (r = cunescape_one(*p, (size_t) -1, &u, &eight_bit, false)) >= 0) { + /* A valid escaped sequence */ + assert(r >= 1); + (*p) += r - 1; if (eight_bit) s[sz++] = u; else sz += utf8_encode_unichar(s + sz, u); - } + } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) && + strchr(separators, **p)) + /* An escaped separator char */ + s[sz++] = c; + else if (flags & EXTRACT_CUNESCAPE_RELAX) { + s[sz++] = '\\'; + s[sz++] = c; + } else + return -EINVAL; } else s[sz++] = c; diff --git a/src/basic/extract-word.h b/src/basic/extract-word.h index e2d433893a..f028577c40 100644 --- a/src/basic/extract-word.h +++ b/src/basic/extract-word.h @@ -7,9 +7,10 @@ typedef enum ExtractFlags { EXTRACT_RELAX = 1 << 0, EXTRACT_CUNESCAPE = 1 << 1, EXTRACT_CUNESCAPE_RELAX = 1 << 2, - EXTRACT_UNQUOTE = 1 << 3, - EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 4, - EXTRACT_RETAIN_ESCAPE = 1 << 5, + EXTRACT_UNESCAPE_SEPARATORS = 1 << 3, + EXTRACT_UNQUOTE = 1 << 4, + EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 5, + EXTRACT_RETAIN_ESCAPE = 1 << 6, } ExtractFlags; int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags); diff --git a/src/test/test-extract-word.c b/src/test/test-extract-word.c index 43ad1b7d82..c71e4d32bf 100644 --- a/src/test/test-extract-word.c +++ b/src/test/test-extract-word.c @@ -341,6 +341,46 @@ static void test_extract_first_word(void) { assert_se(streq(t, "foo\\xbar")); free(t); assert_se(p == NULL); + + p = "\\:"; + assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1); + assert_se(streq(t, ":")); + free(t); + assert_se(p == NULL); + + p = "a\\:b"; + assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1); + assert_se(streq(t, "a:b")); + free(t); + assert_se(p == NULL); + + p = "a\\ b:c"; + assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1); + assert_se(streq(t, "a b")); + free(t); + assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1); + assert_se(streq(t, "c")); + free(t); + assert_se(p == NULL); + + p = "\\:"; + assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE) == -EINVAL); + + p = "a\\:b"; + assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE) == -EINVAL); + assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE) == 1); + assert_se(streq(t, "b")); + free(t); + + p = "a\\ b:c"; + assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE) == -EINVAL); + assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE) == 1); + assert_se(streq(t, "b")); + free(t); + assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE) == 1); + assert_se(streq(t, "c")); + free(t); + assert_se(p == NULL); } static void test_extract_first_word_and_warn(void) { From a082edd53ac9da4a8e06281360754eb15bd1389f Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 7 Jul 2020 17:12:48 +0100 Subject: [PATCH 3/4] strv: add strv_split_colon_pairs function Given a string in the format 'one:two three four:five', returns a string vector with each word. If the second element of the tuple is not present, an empty string is returned in its place, so that the vector can be processed in pairs. [zjs: use EXTRACT_UNESCAPE_SEPARATORS instead of EXTRACT_CUNESCAPE_RELAX. This way we do escaping exactly once and in normal strict mode.] --- src/basic/strv.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ src/basic/strv.h | 5 +++++ src/test/test-strv.c | 30 +++++++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/src/basic/strv.c b/src/basic/strv.c index 858e1e62ec..a172ca2fe9 100644 --- a/src/basic/strv.c +++ b/src/basic/strv.c @@ -353,6 +353,58 @@ int strv_split_extract(char ***t, const char *s, const char *separators, Extract return (int) n; } +int strv_split_colon_pairs(char ***t, const char *s) { + _cleanup_strv_free_ char **l = NULL; + size_t n = 0, allocated = 0; + int r; + + assert(t); + assert(s); + + for (;;) { + _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL, *second_or_empty = NULL; + + r = extract_first_word(&s, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + const char *p = tuple; + r = extract_many_words(&p, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, + &first, &second, NULL); + if (r < 0) + return r; + if (r == 0) + continue; + /* Enforce that at most 2 colon-separated words are contained in each group */ + if (!isempty(p)) + return -EINVAL; + + second_or_empty = strdup(strempty(second)); + if (!second_or_empty) + return -ENOMEM; + + if (!GREEDY_REALLOC(l, allocated, n + 3)) + return -ENOMEM; + + l[n++] = TAKE_PTR(first); + l[n++] = TAKE_PTR(second_or_empty); + + l[n] = NULL; + } + + if (!l) { + l = new0(char*, 1); + if (!l) + return -ENOMEM; + } + + *t = TAKE_PTR(l); + + return (int) n; +} + char *strv_join_prefix(char * const *l, const char *separator, const char *prefix) { char * const *s; char *r, *e; diff --git a/src/basic/strv.h b/src/basic/strv.h index 2ad927bce5..e57dfff69b 100644 --- a/src/basic/strv.h +++ b/src/basic/strv.h @@ -80,6 +80,11 @@ char **strv_split_newlines(const char *s); int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags); +/* Given a string containing white-space separated tuples of words themselves separated by ':', + * returns a vector of strings. If the second element in a tuple is missing, the corresponding + * string in the vector is an empty string. */ +int strv_split_colon_pairs(char ***t, const char *s); + char *strv_join_prefix(char * const *l, const char *separator, const char *prefix); static inline char *strv_join(char * const *l, const char *separator) { return strv_join_prefix(l, separator, NULL); diff --git a/src/test/test-strv.c b/src/test/test-strv.c index cba5441d4b..fda5948f49 100644 --- a/src/test/test-strv.c +++ b/src/test/test-strv.c @@ -407,6 +407,35 @@ static void test_strv_split_extract(void) { assert_se(streq_ptr(l[5], NULL)); } +static void test_strv_split_colon_pairs(void) { + _cleanup_strv_free_ char **l = NULL; + const char *str = "one:two three four:five six seven:eight\\:nine ten\\:eleven\\\\", + *str_inval="one:two three:four:five"; + int r; + + log_info("/* %s */", __func__); + + r = strv_split_colon_pairs(&l, str); + assert_se(r == (int) strv_length(l)); + assert_se(r == 12); + assert_se(streq_ptr(l[0], "one")); + assert_se(streq_ptr(l[1], "two")); + assert_se(streq_ptr(l[2], "three")); + assert_se(streq_ptr(l[3], "")); + assert_se(streq_ptr(l[4], "four")); + assert_se(streq_ptr(l[5], "five")); + assert_se(streq_ptr(l[6], "six")); + assert_se(streq_ptr(l[7], "")); + assert_se(streq_ptr(l[8], "seven")); + assert_se(streq_ptr(l[9], "eight:nine")); + assert_se(streq_ptr(l[10], "ten:eleven\\")); + assert_se(streq_ptr(l[11], "")); + assert_se(streq_ptr(l[12], NULL)); + + r = strv_split_colon_pairs(&l, str_inval); + assert_se(r == -EINVAL); +} + static void test_strv_split_newlines(void) { unsigned i = 0; char **s; @@ -998,6 +1027,7 @@ int main(int argc, char *argv[]) { test_strv_split(); test_strv_split_empty(); test_strv_split_extract(); + test_strv_split_colon_pairs(); test_strv_split_newlines(); test_strv_split_nulstr(); test_strv_parse_nulstr(); From b3d133148ea802e44ec913b2766c811ac2316f9a Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 14 Jul 2020 16:18:41 +0100 Subject: [PATCH 4/4] core: new feature MountImages Follows the same pattern and features as RootImage, but allows an arbitrary mount point under / to be specified by the user, and multiple values - like BindPaths. Original implementation by @topimiettinen at: https://github.com/systemd/systemd/pull/14451 Reworked to use dissect's logic instead of bare libmount() calls and other review comments. Thanks Topi for the initial work to come up with and implement this useful feature. --- man/systemd.exec.xml | 36 +++++++ src/core/dbus-execute.c | 102 +++++++++++++++++++ src/core/execute.c | 15 +++ src/core/execute.h | 2 + src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/load-fragment.c | 88 +++++++++++++++++ src/core/load-fragment.h | 1 + src/core/namespace.c | 137 +++++++++++++++++++++++++- src/core/namespace.h | 13 +++ src/core/unit.c | 4 +- src/shared/bus-unit-util.c | 60 +++++++++++ src/systemctl/systemctl.c | 33 +++++++ src/test/test-namespace.c | 1 + src/test/test-ns.c | 2 + test/units/testsuite-50.sh | 20 ++++ 15 files changed, 512 insertions(+), 3 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index c5d755e897..87019dae4f 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -261,6 +261,42 @@ + + MountImages= + + This setting is similar to RootImage= in that it mounts a file + system hierarchy from a block device node or loopback file, but the destination directory can be + specified as well as mount options. This option expects a whitespace separated list of mount + definitions. Each definition consists of a colon-separated tuple of source path and destination + directory. Each mount definition may be prefixed with -, in which case it will be + ignored when its source path does not exist. The source argument is a path to a block device node or + regular file. If source or destination contain a :, it needs to be escaped as + \:. + The device node or file system image file needs to follow the same rules as specified + for RootImage=. Any mounts created with this option are specific to the unit, and + are not visible in the host's mount table. + + These settings may be used more than once, each usage appends to the unit's list of mount + paths. If the empty string is assigned, the entire list of mount paths defined prior to this is + reset. + + Note that the destination directory must exist or systemd must be able to create it. Thus, it + is not possible to use those options for mount points nested underneath paths specified in + InaccessiblePaths=, or under /home/ and other protected + directories if ProtectHome=yes is specified. + + When DevicePolicy= is set to closed or + strict, or set to auto and DeviceAllow= is + set, then this setting adds /dev/loop-control with rw mode, + block-loop and block-blkext with rwm mode + to DeviceAllow=. See + systemd.resource-control5 + for the details about DevicePolicy= or DeviceAllow=. Also, see + PrivateDevices= below, as it may change the setting of + DevicePolicy=. + + + diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 49729799ab..d5a24b9ab7 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -815,6 +815,40 @@ static int property_get_root_image_options( return sd_bus_message_close_container(reply); } +static int property_get_mount_images( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + int r; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + r = sd_bus_message_open_container(reply, 'a', "(ssb)"); + if (r < 0) + return r; + + for (size_t i = 0; i < c->n_mount_images; i++) { + r = sd_bus_message_append( + reply, "(ssb)", + c->mount_images[i].source, + c->mount_images[i].destination, + c->mount_images[i].ignore_enoent); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -863,6 +897,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("RootHashSignature", "ay", property_get_root_hash_sig, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootHashSignaturePath", "s", NULL, offsetof(ExecContext, root_hash_sig_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootVerity", "s", NULL, offsetof(ExecContext, root_verity), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("MountImages", "a(ssb)", property_get_mount_images, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CoredumpFilter", "t", property_get_coredump_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Nice", "i", property_get_nice, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -2896,6 +2931,73 @@ int bus_exec_context_set_transient_property( return 1; } + } else if (streq(name, "MountImages")) { + _cleanup_free_ char *format_str = NULL; + MountImage *mount_images = NULL; + size_t n_mount_images = 0; + char *source, *destination; + int permissive; + + r = sd_bus_message_enter_container(message, 'a', "(ssb)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(ssb)", &source, &destination, &permissive)) > 0) { + char *tuple; + + if (!path_is_absolute(source)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source); + if (!path_is_normalized(source)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not normalized.", source); + if (!path_is_absolute(destination)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", destination); + if (!path_is_normalized(destination)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not normalized.", destination); + + tuple = strjoin(format_str, format_str ? " " : "", permissive ? "-" : "", source, ":", destination); + if (!tuple) + return -ENOMEM; + free_and_replace(format_str, tuple); + + r = mount_image_add(&mount_images, &n_mount_images, + &(MountImage) { + .source = source, + .destination = destination, + .ignore_enoent = permissive, + }); + if (r < 0) + return r; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + if (n_mount_images == 0) { + c->mount_images = mount_image_free_many(c->mount_images, &c->n_mount_images); + + unit_write_settingf(u, flags, name, "%s=", name); + } else { + for (size_t i = 0; i < n_mount_images; ++i) { + r = mount_image_add(&c->mount_images, &c->n_mount_images, &mount_images[i]); + if (r < 0) + return r; + } + + unit_write_settingf(u, flags|UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS, + name, + "%s=%s", + name, + format_str); + } + } + + mount_images = mount_image_free_many(mount_images, &n_mount_images); + + return 1; } return 0; diff --git a/src/core/execute.c b/src/core/execute.c index 39ffcba580..123396f6f0 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1932,6 +1932,9 @@ static bool exec_needs_mount_namespace( if (context->n_temporary_filesystems > 0) return true; + if (context->n_mount_images > 0) + return true; + if (!IN_SET(context->mount_flags, 0, MS_SHARED)) return true; @@ -2570,6 +2573,9 @@ static bool insist_on_sandboxing( if (root_dir || root_image) return true; + if (context->n_mount_images > 0) + return true; + if (context->dynamic_user) return true; @@ -2669,6 +2675,8 @@ static int apply_mount_namespace( n_bind_mounts, context->temporary_filesystems, context->n_temporary_filesystems, + context->mount_images, + context->n_mount_images, tmp_dir, var_tmp_dir, context->log_namespace, @@ -4234,6 +4242,7 @@ void exec_context_done(ExecContext *c) { temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems); c->temporary_filesystems = NULL; c->n_temporary_filesystems = 0; + c->mount_images = mount_image_free_many(c->mount_images, &c->n_mount_images); cpu_set_reset(&c->cpu_set); numa_policy_reset(&c->numa_policy); @@ -5025,6 +5034,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { else fprintf(f, "%d\n", c->syscall_errno); } + + for (i = 0; i < c->n_mount_images; i++) + fprintf(f, "%sMountImages: %s%s:%s\n", prefix, + c->mount_images[i].ignore_enoent ? "-": "", + c->mount_images[i].source, + c->mount_images[i].destination); } bool exec_context_maintains_privileges(const ExecContext *c) { diff --git a/src/core/execute.h b/src/core/execute.h index 349f583c1a..631279038d 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -239,6 +239,8 @@ struct ExecContext { size_t n_bind_mounts; TemporaryFileSystem *temporary_filesystems; size_t n_temporary_filesystems; + MountImage *mount_images; + size_t n_mount_images; uint64_t capability_bounding_set; uint64_t capability_ambient_set; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index a7c9bd9f71..b9e7769e4e 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -27,6 +27,7 @@ $1.RootImageOptions, config_parse_root_image_options, 0, $1.RootHash, config_parse_exec_root_hash, 0, offsetof($1, exec_context) $1.RootHashSignature, config_parse_exec_root_hash_sig, 0, offsetof($1, exec_context) $1.RootVerity, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_verity) +$1.MountImages, config_parse_mount_images, 0, offsetof($1, exec_context) $1.User, config_parse_user_group_compat, 0, offsetof($1, exec_context.user) $1.Group, config_parse_user_group_compat, 0, offsetof($1, exec_context.group) $1.SupplementaryGroups, config_parse_user_group_strv_compat, 0, offsetof($1, exec_context.supplementary_groups) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 2a2a5af58f..90eb52f432 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -4675,6 +4675,94 @@ int config_parse_bind_paths( return 0; } +int config_parse_mount_images( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_strv_free_ char **l = NULL; + ExecContext *c = data; + const Unit *u = userdata; + char **source = NULL, **destination = NULL; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + /* Empty assignment resets the list */ + c->mount_images = mount_image_free_many(c->mount_images, &c->n_mount_images); + return 0; + } + + r = strv_split_colon_pairs(&l, rvalue); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s, ignoring: %s", lvalue, rvalue); + return 0; + } + + STRV_FOREACH_PAIR(source, destination, l) { + _cleanup_free_ char *sresolved = NULL, *dresolved = NULL; + char *s = NULL; + bool permissive = false; + + r = unit_full_printf(u, *source, &sresolved); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to resolve unit specifiers in \"%s\", ignoring: %m", *source); + continue; + } + + s = sresolved; + if (s[0] == '-') { + permissive = true; + s++; + } + + r = path_simplify_and_warn(s, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue); + if (r < 0) + continue; + + if (isempty(*destination)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Missing destination in %s, ignoring: %s", lvalue, rvalue); + continue; + } + + r = unit_full_printf(u, *destination, &dresolved); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to resolve specifiers in \"%s\", ignoring: %m", *destination); + continue; + } + + r = path_simplify_and_warn(dresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue); + if (r < 0) + continue; + + r = mount_image_add(&c->mount_images, &c->n_mount_images, + &(MountImage) { + .source = s, + .destination = dresolved, + .ignore_enoent = permissive, + }); + if (r < 0) + return log_oom(); + } + + return 0; +} + int config_parse_job_timeout_sec( const char* unit, const char *filename, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 253de9467f..2672db5ace 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -128,6 +128,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_output_restricted); CONFIG_PARSER_PROTOTYPE(config_parse_crash_chvt); CONFIG_PARSER_PROTOTYPE(config_parse_timeout_abort); CONFIG_PARSER_PROTOTYPE(config_parse_swap_priority); +CONFIG_PARSER_PROTOTYPE(config_parse_mount_images); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length); diff --git a/src/core/namespace.c b/src/core/namespace.c index 16d40fedc0..f2288df79b 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -15,6 +15,7 @@ #include "format-util.h" #include "fs-util.h" #include "label.h" +#include "list.h" #include "loop-util.h" #include "loopback-setup.h" #include "mkdir.h" @@ -40,6 +41,7 @@ typedef enum MountMode { /* This is ordered by priority! */ INACCESSIBLE, + MOUNT_IMAGES, BIND_MOUNT, BIND_MOUNT_RECURSIVE, PRIVATE_TMP, @@ -65,12 +67,13 @@ typedef struct MountEntry { bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */ bool applied:1; /* Already applied */ char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */ - const char *source_const; /* The source path, for bind mounts */ + const char *source_const; /* The source path, for bind mounts or images */ char *source_malloc; const char *options_const;/* Mount options for tmpfs */ char *options_malloc; unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */ unsigned n_followed; + LIST_FIELDS(MountEntry, mount_entry); } MountEntry; /* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted @@ -205,6 +208,7 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = { [READONLY] = "read-only", [READWRITE] = "read-write", [TMPFS] = "tmpfs", + [MOUNT_IMAGES] = "mount-images", [READWRITE_IMPLICIT] = "rw-implicit", }; @@ -325,6 +329,23 @@ static int append_bind_mounts(MountEntry **p, const BindMount *binds, size_t n) return 0; } +static int append_mount_images(MountEntry **p, const MountImage *mount_images, size_t n) { + assert(p); + + for (size_t i = 0; i < n; i++) { + const MountImage *m = mount_images + i; + + *((*p)++) = (MountEntry) { + .path_const = m->destination, + .mode = MOUNT_IMAGES, + .source_const = m->source, + .ignore = m->ignore_enoent, + }; + } + + return 0; +} + static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, size_t n) { assert(p); @@ -882,6 +903,61 @@ static int mount_tmpfs(const MountEntry *m) { return 1; } +static int mount_images(const MountEntry *m) { + _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; + _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; + _cleanup_free_ void *root_hash_decoded = NULL; + _cleanup_free_ char *verity_data = NULL, *hash_sig = NULL; + DissectImageFlags dissect_image_flags = m->read_only ? DISSECT_IMAGE_READ_ONLY : 0; + size_t root_hash_size = 0; + int r; + + r = verity_metadata_load(mount_entry_source(m), NULL, &root_hash_decoded, &root_hash_size, &verity_data, &hash_sig); + if (r < 0) + return log_debug_errno(r, "Failed to load root hash: %m"); + dissect_image_flags |= verity_data ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0; + + r = loop_device_make_by_path(mount_entry_source(m), + m->read_only ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */, + verity_data ? 0 : LO_FLAGS_PARTSCAN, + &loop_device); + if (r < 0) + return log_debug_errno(r, "Failed to create loop device for image: %m"); + + r = dissect_image(loop_device->fd, root_hash_decoded, root_hash_size, verity_data, NULL, dissect_image_flags, &dissected_image); + /* No partition table? Might be a single-filesystem image, try again */ + if (!verity_data && r < 0 && r == -ENOPKG) + r = dissect_image(loop_device->fd, root_hash_decoded, root_hash_size, verity_data, NULL, dissect_image_flags|DISSECT_IMAGE_NO_PARTITION_TABLE, &dissected_image); + if (r < 0) + return log_debug_errno(r, "Failed to dissect image: %m"); + + r = dissected_image_decrypt(dissected_image, NULL, root_hash_decoded, root_hash_size, verity_data, hash_sig, NULL, 0, dissect_image_flags, &decrypted_image); + if (r < 0) + return log_debug_errno(r, "Failed to decrypt dissected image: %m"); + + r = mkdir_p_label(mount_entry_path(m), 0755); + if (r < 0) + return log_debug_errno(r, "Failed to create destination directory %s: %m", mount_entry_path(m)); + r = umount_recursive(mount_entry_path(m), 0); + if (r < 0) + return log_debug_errno(r, "Failed to umount under destination directory %s: %m", mount_entry_path(m)); + + r = dissected_image_mount(dissected_image, mount_entry_path(m), UID_INVALID, dissect_image_flags); + if (r < 0) + return log_debug_errno(r, "Failed to mount image: %m"); + + if (decrypted_image) { + r = decrypted_image_relinquish(decrypted_image); + if (r < 0) + return log_debug_errno(r, "Failed to relinquish decrypted image: %m"); + } + + loop_device_relinquish(loop_device); + + return 1; +} + static int follow_symlink( const char *root_directory, MountEntry *m) { @@ -1031,6 +1107,9 @@ static int apply_mount( case PROCFS: return mount_procfs(m); + case MOUNT_IMAGES: + return mount_images(m); + default: assert_not_reached("Unknown mode"); } @@ -1149,6 +1228,7 @@ static size_t namespace_calculate_mounts( char** empty_directories, size_t n_bind_mounts, size_t n_temporary_filesystems, + size_t n_mount_images, const char* tmp_dir, const char* var_tmp_dir, const char* log_namespace, @@ -1178,6 +1258,7 @@ static size_t namespace_calculate_mounts( strv_length(inaccessible_paths) + strv_length(empty_directories) + n_bind_mounts + + n_mount_images + n_temporary_filesystems + ns_info->private_dev + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + @@ -1267,6 +1348,8 @@ int setup_namespace( size_t n_bind_mounts, const TemporaryFileSystem *temporary_filesystems, size_t n_temporary_filesystems, + const MountImage *mount_images, + size_t n_mount_images, const char* tmp_dir, const char* var_tmp_dir, const char *log_namespace, @@ -1374,6 +1457,7 @@ int setup_namespace( empty_directories, n_bind_mounts, n_temporary_filesystems, + n_mount_images, tmp_dir, var_tmp_dir, log_namespace, protect_home, protect_system); @@ -1427,6 +1511,10 @@ int setup_namespace( }; } + r = append_mount_images(&m, mount_images, n_mount_images); + if (r < 0) + goto finish; + if (ns_info->private_dev) { *(m++) = (MountEntry) { .path_const = "/dev", @@ -1741,6 +1829,53 @@ int bind_mount_add(BindMount **b, size_t *n, const BindMount *item) { return 0; } +MountImage* mount_image_free_many(MountImage *m, size_t *n) { + size_t i; + + assert(n); + assert(m || *n == 0); + + for (i = 0; i < *n; i++) { + free(m[i].source); + free(m[i].destination); + } + + free(m); + *n = 0; + return NULL; +} + +int mount_image_add(MountImage **m, size_t *n, const MountImage *item) { + _cleanup_free_ char *s = NULL, *d = NULL; + MountImage *c; + + assert(m); + assert(n); + assert(item); + + s = strdup(item->source); + if (!s) + return -ENOMEM; + + d = strdup(item->destination); + if (!d) + return -ENOMEM; + + c = reallocarray(*m, *n + 1, sizeof(MountImage)); + if (!c) + return -ENOMEM; + + *m = c; + + c[(*n) ++] = (MountImage) { + .source = TAKE_PTR(s), + .destination = TAKE_PTR(d), + .ignore_enoent = item->ignore_enoent, + }; + + return 0; +} + void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n) { size_t i; diff --git a/src/core/namespace.h b/src/core/namespace.h index 258bd7c131..d1e0a28562 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -8,6 +8,8 @@ typedef struct NamespaceInfo NamespaceInfo; typedef struct BindMount BindMount; typedef struct TemporaryFileSystem TemporaryFileSystem; +typedef struct MountImage MountImage; +typedef struct MountEntry MountEntry; #include @@ -72,6 +74,12 @@ struct TemporaryFileSystem { char *options; }; +struct MountImage { + char *source; + char *destination; + bool ignore_enoent; +}; + int setup_namespace( const char *root_directory, const char *root_image, @@ -85,6 +93,8 @@ int setup_namespace( size_t n_bind_mounts, const TemporaryFileSystem *temporary_filesystems, size_t n_temporary_filesystems, + const MountImage *mount_images, + size_t n_mount_images, const char *tmp_dir, const char *var_tmp_dir, const char *log_namespace, @@ -132,6 +142,9 @@ void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n); int temporary_filesystem_add(TemporaryFileSystem **t, size_t *n, const char *path, const char *options); +MountImage* mount_image_free_many(MountImage *m, size_t *n); +int mount_image_add(MountImage **m, size_t *n, const MountImage *item); + const char* namespace_type_to_string(NamespaceType t) _const_; NamespaceType namespace_type_from_string(const char *s) _pure_; diff --git a/src/core/unit.c b/src/core/unit.c index 2c09def06f..d6eb4990fe 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4527,11 +4527,11 @@ int unit_patch_contexts(Unit *u) { cc->device_policy == CGROUP_DEVICE_POLICY_AUTO) cc->device_policy = CGROUP_DEVICE_POLICY_CLOSED; - if (ec->root_image && + if ((ec->root_image || !LIST_IS_EMPTY(ec->mount_images)) && (cc->device_policy != CGROUP_DEVICE_POLICY_AUTO || cc->device_allow)) { const char *p; - /* When RootImage= is specified, the following devices are touched. */ + /* When RootImage= or MountImages= is specified, the following devices are touched. */ FOREACH_STRING(p, "/dev/loop-control", "/dev/mapper/control") { r = cgroup_add_device_allow(cc, p, "rw"); if (r < 0) diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 30a872342f..3f4ab65af8 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -18,6 +18,7 @@ #include "hostname-util.h" #include "in-addr-util.h" #include "ip-protocol-list.h" +#include "libmount-util.h" #include "locale-util.h" #include "log.h" #include "missing_fs.h" @@ -1522,6 +1523,65 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con return 1; } + if (streq(field, "MountImages")) { + _cleanup_strv_free_ char **l = NULL; + char **source = NULL, **destination = NULL; + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ssb)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ssb)"); + if (r < 0) + return bus_log_create_error(r); + + r = strv_split_colon_pairs(&l, p); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + + STRV_FOREACH_PAIR(source, destination, l) { + char *s = *source; + bool permissive = false; + + if (s[0] == '-') { + permissive = true; + s++; + } + + if (isempty(*destination)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Missing argument after ':': %s", + eq); + + r = sd_bus_message_append(m, "(ssb)", s, *destination, permissive); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + return 0; } diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index c58a19a099..d55a89efab 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -5408,6 +5408,39 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m bus_print_property_value(name, expected_value, value, affinity); return 1; + } else if (streq(name, "MountImages")) { + _cleanup_free_ char *paths = NULL; + const char *source, *dest; + int ignore_enoent; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ssb)"); + if (r < 0) + return bus_log_parse_error(r); + + while ((r = sd_bus_message_read(m, "(ssb)", &source, &dest, &ignore_enoent)) > 0) { + _cleanup_free_ char *str = NULL; + + if (isempty(source)) + continue; + + if (asprintf(&str, "%s%s:%s", ignore_enoent ? "-" : "", source, dest) < 0) + return log_oom(); + + if (!strextend_with_separator(&paths, " ", str, NULL)) + return log_oom(); + } + if (r < 0) + return bus_log_parse_error(r); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return bus_log_parse_error(r); + + if (all || !isempty(paths)) + bus_print_property_value(name, expected_value, value, strempty(paths)); + + return 1; + } break; diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 95021ee7bf..f70b7e778e 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -159,6 +159,7 @@ static void test_protect_kernel_logs(void) { NULL, NULL, 0, NULL, 0, + NULL, 0, NULL, NULL, NULL, diff --git a/src/test/test-ns.c b/src/test/test-ns.c index ced287dd6e..cba8ee2b2b 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -71,6 +71,8 @@ int main(int argc, char *argv[]) { NULL, &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1, &(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1, + NULL, + 0, tmp_dir, var_tmp_dir, NULL, diff --git a/test/units/testsuite-50.sh b/test/units/testsuite-50.sh index 28144b378f..587184e854 100755 --- a/test/units/testsuite-50.sh +++ b/test/units/testsuite-50.sh @@ -155,6 +155,26 @@ journalctl -b -u testservice-50b.service | grep -F "squashfs" | grep -q -F "noat # Check that specifier escape is applied %%foo -> %foo busctl get-property org.freedesktop.systemd1 /org/freedesktop/systemd1/unit/testservice_2d50b_2eservice org.freedesktop.systemd1.Service RootImageOptions | grep -F "nosuid,dev,%foo" +# Now do some checks with MountImages, both by itself and in combination with RootImage, and as single FS or GPT image +systemd-run -t --property MountImages="${image}.gpt:/run/img1 ${image}.raw:/run/img2" /usr/bin/cat /run/img1/usr/lib/os-release | grep -q -F "MARKER=1" +systemd-run -t --property MountImages="${image}.gpt:/run/img1 ${image}.raw:/run/img2" /usr/bin/cat /run/img2/usr/lib/os-release | grep -q -F "MARKER=1" +systemd-run -t --property MountImages="${image}.raw:/run/img2\:3" /usr/bin/cat /run/img2:3/usr/lib/os-release | grep -q -F "MARKER=1" +systemd-run -t --property TemporaryFileSystem=/run --property RootImage=${image}.raw --property MountImages="${image}.gpt:/run/img1 ${image}.raw:/run/img2" /usr/bin/cat /usr/lib/os-release | grep -q -F "MARKER=1" +systemd-run -t --property TemporaryFileSystem=/run --property RootImage=${image}.raw --property MountImages="${image}.gpt:/run/img1 ${image}.raw:/run/img2" /usr/bin/cat /run/img1/usr/lib/os-release | grep -q -F "MARKER=1" +systemd-run -t --property TemporaryFileSystem=/run --property RootImage=${image}.gpt --property RootHash=${roothash} --property MountImages="${image}.gpt:/run/img1 ${image}.raw:/run/img2" /usr/bin/cat /run/img2/usr/lib/os-release | grep -q -F "MARKER=1" +cat >/run/systemd/system/testservice-50.service < /testok exit 0