Merge pull request #9852 from poettering/namespace-errno
namespace: be more careful when handling namespacing failures
This commit is contained in:
commit
52e4d62550
|
@ -750,6 +750,15 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
|
|||
<refsect1>
|
||||
<title>Sandboxing</title>
|
||||
|
||||
<para>The following sandboxing options are an effective way to limit the exposure of the system towards the unit's
|
||||
processes. It is recommended to turn on as many of these options for each unit as is possible without negatively
|
||||
affecting the process' ability to operate. Note that many of these sandboxing features are gracefully turned off on
|
||||
systems where the underlying security mechanism is not available. For example, <varname>ProtectSystem=</varname>
|
||||
has no effect if the kernel is built without file system namespacing or if the service manager runs in a container
|
||||
manager that makes file system namespacing unavailable to its payload. Similar,
|
||||
<varname>RestrictRealtime=</varname> has no effect on systems that lack support for SECCOMP system call filtering,
|
||||
or in containers where support for this is turned off.</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
|
|
|
@ -2396,10 +2396,15 @@ static int apply_mount_namespace(
|
|||
|
||||
bind_mount_free_many(bind_mounts, n_bind_mounts);
|
||||
|
||||
/* If we couldn't set up the namespace this is probably due to a
|
||||
* missing capability. In this case, silently proceeed. */
|
||||
if (IN_SET(r, -EPERM, -EACCES)) {
|
||||
log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
|
||||
/* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
|
||||
* that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
|
||||
* sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
|
||||
* completely different execution environment. */
|
||||
if (r == -ENOANO &&
|
||||
n_bind_mounts == 0 && context->n_temporary_filesystems == 0 &&
|
||||
!root_dir && !root_image &&
|
||||
!context->dynamic_user) {
|
||||
log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1313,8 +1313,17 @@ int setup_namespace(
|
|||
normalize_mounts(root, mounts, &n_mounts);
|
||||
}
|
||||
|
||||
/* All above is just preparation, figuring out what to do. Let's now actually start doing something. */
|
||||
|
||||
if (unshare(CLONE_NEWNS) < 0) {
|
||||
r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m");
|
||||
if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS))
|
||||
/* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place
|
||||
* that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable
|
||||
* error back, which the caller can use to detect this case (and only this) and optionally
|
||||
* continue without namespacing applied. */
|
||||
r = -ENOANO;
|
||||
|
||||
goto finish;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue