From 1beab8b0d0ff2d7d1436b52d4a0c3d56dc908962 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 10 Aug 2018 15:07:14 +0200 Subject: [PATCH] namespace: be more careful when handling namespacing failures gracefully This makes two changes to the namespacing code: 1. We'll only gracefully skip service namespacing on access failure if exclusively sandboxing options where selected, and not mount-related options that result in a very different view of the world. For example, ignoring RootDirectory=, RootImage= or Bind= is really probablematic, but ReadOnlyPaths= is just a weaker sandbox. 2. The namespacing code will now return a clearly recognizable error code when it cannot enforce its namespacing, so that we cannot confuse EPERM errors from mount() with those from unshare(). Only the errors from the first unshare() are now taken as hint to gracefully disable namespacing. Fixes: #9844 #9835 --- src/core/execute.c | 13 +++++++++---- src/core/namespace.c | 9 +++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 67f6dbd600..ae90af9570 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2386,10 +2386,15 @@ static int apply_mount_namespace( bind_mount_free_many(bind_mounts, n_bind_mounts); - /* If we couldn't set up the namespace this is probably due to a - * missing capability. In this case, silently proceeed. */ - if (IN_SET(r, -EPERM, -EACCES)) { - log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); + /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports + * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively + * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a + * completely different execution environment. */ + if (r == -ENOANO && + n_bind_mounts == 0 && context->n_temporary_filesystems == 0 && + !root_dir && !root_image && + !context->dynamic_user) { + log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring."); return 0; } diff --git a/src/core/namespace.c b/src/core/namespace.c index 3488758e82..62518e1c4c 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1290,8 +1290,17 @@ int setup_namespace( normalize_mounts(root, mounts, &n_mounts); } + /* All above is just preparation, figuring out what to do. Let's now actually start doing something. */ + if (unshare(CLONE_NEWNS) < 0) { r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m"); + if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS)) + /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place + * that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable + * error back, which the caller can use to detect this case (and only this) and optionally + * continue without namespacing applied. */ + r = -ENOANO; + goto finish; }