From 4e677599607d582367151280aa5d922f80fd962e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Nov 2019 12:23:17 +0100 Subject: [PATCH] core: be more lenient when checking whether sandboxing is necessary In some containers unshare() is made unavailable entirely. Let's deal with this that more gracefully and disable our sandboxing of services then, so that we work in a container, under the assumption the container manager is then responsible for sandboxing if we can't do it ourselves. Previously, we'd insist on sandboxing as soon as any form of BindPath= is used. With this change we only insist on it if we have a setting like that where source and destination differ, i.e. there's a mapping established that actually rearranges things, and thus would result in systematically different behaviour if skipped (as opposed to mappings that just make stuff read-only/writable that otherwise arent'). (Let's also update a test that intended to test for this behaviour with a more specific configuration that still triggers the behaviour with this change in place) Fixes: #13955 (For testing purposes unshare() can easily be blocked with systemd-nspawn --system-call-filter=~unshare.) --- src/core/execute.c | 60 +++++++++++++++---- .../exec-readonlypaths-with-bindpaths.service | 3 +- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 8ab4b18dc7..def73977fc 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2459,6 +2459,40 @@ finish: return r; } +static bool insist_on_sandboxing( + const ExecContext *context, + const char *root_dir, + const char *root_image, + const BindMount *bind_mounts, + size_t n_bind_mounts) { + + size_t i; + + assert(context); + assert(n_bind_mounts == 0 || bind_mounts); + + /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that + * would alter the view on the file system beyond making things read-only or invisble, i.e. would + * rearrange stuff in a way we cannot ignore gracefully. */ + + if (context->n_temporary_filesystems > 0) + return true; + + if (root_dir || root_image) + return true; + + if (context->dynamic_user) + return true; + + /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes + * essential. */ + for (i = 0; i < n_bind_mounts; i++) + if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination)) + return true; + + return false; +} + static int apply_mount_namespace( const Unit *u, const ExecCommand *command, @@ -2545,28 +2579,28 @@ static int apply_mount_namespace( DISSECT_IMAGE_DISCARD_ON_LOOP, error_path); - bind_mount_free_many(bind_mounts, n_bind_mounts); - /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a * completely different execution environment. */ if (r == -ENOANO) { - if (n_bind_mounts == 0 && - context->n_temporary_filesystems == 0 && - !root_dir && !root_image && - !context->dynamic_user) { + if (insist_on_sandboxing( + context, + root_dir, root_image, + bind_mounts, + n_bind_mounts)) { + log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n" + "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s", + n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user)); + + r = -EOPNOTSUPP; + } else { log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring."); - return 0; + r = 0; } - - log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n" - "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s", - n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user)); - - return -EOPNOTSUPP; } + bind_mount_free_many(bind_mounts, n_bind_mounts); return r; } diff --git a/test/test-execute/exec-readonlypaths-with-bindpaths.service b/test/test-execute/exec-readonlypaths-with-bindpaths.service index ea9211395d..438c7de704 100644 --- a/test/test-execute/exec-readonlypaths-with-bindpaths.service +++ b/test/test-execute/exec-readonlypaths-with-bindpaths.service @@ -3,7 +3,6 @@ Description=Test for ReadOnlyPaths= [Service] ReadOnlyPaths=/etc -/i-dont-exist /usr -# From 6c47cd7d3bf35c8158a0737f34fe2c5dc95e72d6, RuntimeDirectory= implies BindPaths=. -RuntimeDirectory=foo +BindPaths=/etc:/tmp/etc2 ExecStart=/bin/sh -x -c 'test ! -w /etc && test ! -w /usr && test ! -e /i-dont-exist && test -w /var' Type=oneshot