nspawn: fix MS_SHARED mount propagation for userns containers

We want our OS trees to be MS_SHARED by default, so that our service
namespacing logic can work correctly. Thus in nspawn we mount everything
MS_SHARED when organizing our tree. We do this early on, before changing
the user namespace (if that's requested). However CLONE_NEWUSER actually
resets MS_SHARED to MS_SLAVE for all mounts (so that less privileged
environments can't affect the more privileged ones). Hence, when
invoking it we have to reset things to MS_SHARED afterwards again. This
won't reestablish propagation, but it will make sure we get a new set of
mount peer groups everywhere that then are honoured for the mount
namespaces/propagated mounts set up inside the container further down.
This commit is contained in:
Lennart Poettering 2020-07-23 11:13:44 +02:00
parent fe224669fb
commit 2a2e78e969
1 changed files with 17 additions and 12 deletions

View File

@ -2977,13 +2977,20 @@ static int inner_child(
/* Wait until the parent wrote the UID map */
if (!barrier_place_and_sync(barrier)) /* #2 */
return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
"Parent died too early");
}
return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Parent died too early");
r = reset_uid_gid();
if (r < 0)
return log_error_errno(r, "Couldn't become new root: %m");
/* Become the new root user inside our namespace */
r = reset_uid_gid();
if (r < 0)
return log_error_errno(r, "Couldn't become new root: %m");
/* Creating a new user namespace means all MS_SHARED mounts become MS_SLAVE. Let's put them
* back to MS_SHARED here, since that's what we want as defaults. (This will not reconnect
* propagation, but simply create new peer groups for all our mounts). */
r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SHARED|MS_REC, NULL);
if (r < 0)
return r;
}
r = mount_all(NULL,
arg_mount_settings | MOUNT_IN_USERNS,
@ -3350,9 +3357,8 @@ static int outer_child(
if (r < 0)
return r;
/* Mark everything as slave, so that we still
* receive mounts from the real root, but don't
* propagate mounts to the real root. */
/* Mark everything as slave, so that we still receive mounts from the real root, but don't propagate
* mounts to the real root. */
r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
if (r < 0)
return r;
@ -3598,9 +3604,8 @@ static int outer_child(
notify_socket = safe_close(notify_socket);
uid_shift_socket = safe_close(uid_shift_socket);
/* The inner child has all namespaces that are
* requested, so that we all are owned by the user if
* user namespaces are turned on. */
/* The inner child has all namespaces that are requested, so that we all are owned by the
* user if user namespaces are turned on. */
if (arg_network_namespace_path) {
r = namespace_enter(-1, -1, netns_fd, -1, -1);