Merge pull request #9852 from poettering/namespace-errno

namespace: be more careful when handling namespacing failures
This commit is contained in:
Yu Watanabe 2018-08-22 11:16:29 +09:00 committed by GitHub
commit 52e4d62550
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 4 deletions

View File

@ -750,6 +750,15 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<refsect1>
<title>Sandboxing</title>
<para>The following sandboxing options are an effective way to limit the exposure of the system towards the unit's
processes. It is recommended to turn on as many of these options for each unit as is possible without negatively
affecting the process' ability to operate. Note that many of these sandboxing features are gracefully turned off on
systems where the underlying security mechanism is not available. For example, <varname>ProtectSystem=</varname>
has no effect if the kernel is built without file system namespacing or if the service manager runs in a container
manager that makes file system namespacing unavailable to its payload. Similar,
<varname>RestrictRealtime=</varname> has no effect on systems that lack support for SECCOMP system call filtering,
or in containers where support for this is turned off.</para>
<variablelist>
<varlistentry>

View File

@ -2396,10 +2396,15 @@ static int apply_mount_namespace(
bind_mount_free_many(bind_mounts, n_bind_mounts);
/* If we couldn't set up the namespace this is probably due to a
* missing capability. In this case, silently proceeed. */
if (IN_SET(r, -EPERM, -EACCES)) {
log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
/* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
* that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
* sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
* completely different execution environment. */
if (r == -ENOANO &&
n_bind_mounts == 0 && context->n_temporary_filesystems == 0 &&
!root_dir && !root_image &&
!context->dynamic_user) {
log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
return 0;
}

View File

@ -1313,8 +1313,17 @@ int setup_namespace(
normalize_mounts(root, mounts, &n_mounts);
}
/* All above is just preparation, figuring out what to do. Let's now actually start doing something. */
if (unshare(CLONE_NEWNS) < 0) {
r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m");
if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS))
/* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place
* that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable
* error back, which the caller can use to detect this case (and only this) and optionally
* continue without namespacing applied. */
r = -ENOANO;
goto finish;
}