core: be more lenient when checking whether sandboxing is necessary

In some containers unshare() is made unavailable entirely. Let's deal
with this that more gracefully and disable our sandboxing of services
then, so that we work in a container, under the assumption the container
manager is then responsible for sandboxing if we can't do it ourselves.

Previously, we'd insist on sandboxing as soon as any form of BindPath=
is used. With this change we only insist on it if we have a setting like
that where source and destination differ, i.e. there's a mapping
established that actually rearranges things, and thus would result in
systematically different behaviour if skipped (as opposed to mappings
that just make stuff read-only/writable that otherwise arent').

(Let's also update a test that intended to test for this behaviour with
a more specific configuration that still triggers the behaviour with
this change in place)

Fixes: #13955

(For testing purposes unshare() can easily be blocked with
systemd-nspawn --system-call-filter=~unshare.)
This commit is contained in:
Lennart Poettering 2019-11-20 12:23:17 +01:00
parent e884e00071
commit 4e67759960
2 changed files with 48 additions and 15 deletions

View File

@ -2459,6 +2459,40 @@ finish:
return r;
}
static bool insist_on_sandboxing(
const ExecContext *context,
const char *root_dir,
const char *root_image,
const BindMount *bind_mounts,
size_t n_bind_mounts) {
size_t i;
assert(context);
assert(n_bind_mounts == 0 || bind_mounts);
/* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
* would alter the view on the file system beyond making things read-only or invisble, i.e. would
* rearrange stuff in a way we cannot ignore gracefully. */
if (context->n_temporary_filesystems > 0)
return true;
if (root_dir || root_image)
return true;
if (context->dynamic_user)
return true;
/* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
* essential. */
for (i = 0; i < n_bind_mounts; i++)
if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
return true;
return false;
}
static int apply_mount_namespace(
const Unit *u,
const ExecCommand *command,
@ -2545,28 +2579,28 @@ static int apply_mount_namespace(
DISSECT_IMAGE_DISCARD_ON_LOOP,
error_path);
bind_mount_free_many(bind_mounts, n_bind_mounts);
/* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
* that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
* sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
* completely different execution environment. */
if (r == -ENOANO) {
if (n_bind_mounts == 0 &&
context->n_temporary_filesystems == 0 &&
!root_dir && !root_image &&
!context->dynamic_user) {
if (insist_on_sandboxing(
context,
root_dir, root_image,
bind_mounts,
n_bind_mounts)) {
log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
"Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
r = -EOPNOTSUPP;
} else {
log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
return 0;
r = 0;
}
log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
"Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
return -EOPNOTSUPP;
}
bind_mount_free_many(bind_mounts, n_bind_mounts);
return r;
}

View File

@ -3,7 +3,6 @@ Description=Test for ReadOnlyPaths=
[Service]
ReadOnlyPaths=/etc -/i-dont-exist /usr
# From 6c47cd7d3bf35c8158a0737f34fe2c5dc95e72d6, RuntimeDirectory= implies BindPaths=.
RuntimeDirectory=foo
BindPaths=/etc:/tmp/etc2
ExecStart=/bin/sh -x -c 'test ! -w /etc && test ! -w /usr && test ! -e /i-dont-exist && test -w /var'
Type=oneshot