nspawn: fix MS_SHARED mount propagation for userns containers
We want our OS trees to be MS_SHARED by default, so that our service namespacing logic can work correctly. Thus in nspawn we mount everything MS_SHARED when organizing our tree. We do this early on, before changing the user namespace (if that's requested). However CLONE_NEWUSER actually resets MS_SHARED to MS_SLAVE for all mounts (so that less privileged environments can't affect the more privileged ones). Hence, when invoking it we have to reset things to MS_SHARED afterwards again. This won't reestablish propagation, but it will make sure we get a new set of mount peer groups everywhere that then are honoured for the mount namespaces/propagated mounts set up inside the container further down.
This commit is contained in:
parent
fe224669fb
commit
2a2e78e969
|
@ -2977,13 +2977,20 @@ static int inner_child(
|
||||||
|
|
||||||
/* Wait until the parent wrote the UID map */
|
/* Wait until the parent wrote the UID map */
|
||||||
if (!barrier_place_and_sync(barrier)) /* #2 */
|
if (!barrier_place_and_sync(barrier)) /* #2 */
|
||||||
return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
|
return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Parent died too early");
|
||||||
"Parent died too early");
|
|
||||||
}
|
|
||||||
|
|
||||||
r = reset_uid_gid();
|
/* Become the new root user inside our namespace */
|
||||||
if (r < 0)
|
r = reset_uid_gid();
|
||||||
return log_error_errno(r, "Couldn't become new root: %m");
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Couldn't become new root: %m");
|
||||||
|
|
||||||
|
/* Creating a new user namespace means all MS_SHARED mounts become MS_SLAVE. Let's put them
|
||||||
|
* back to MS_SHARED here, since that's what we want as defaults. (This will not reconnect
|
||||||
|
* propagation, but simply create new peer groups for all our mounts). */
|
||||||
|
r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SHARED|MS_REC, NULL);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
r = mount_all(NULL,
|
r = mount_all(NULL,
|
||||||
arg_mount_settings | MOUNT_IN_USERNS,
|
arg_mount_settings | MOUNT_IN_USERNS,
|
||||||
|
@ -3350,9 +3357,8 @@ static int outer_child(
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
/* Mark everything as slave, so that we still
|
/* Mark everything as slave, so that we still receive mounts from the real root, but don't propagate
|
||||||
* receive mounts from the real root, but don't
|
* mounts to the real root. */
|
||||||
* propagate mounts to the real root. */
|
|
||||||
r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
|
r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
@ -3598,9 +3604,8 @@ static int outer_child(
|
||||||
notify_socket = safe_close(notify_socket);
|
notify_socket = safe_close(notify_socket);
|
||||||
uid_shift_socket = safe_close(uid_shift_socket);
|
uid_shift_socket = safe_close(uid_shift_socket);
|
||||||
|
|
||||||
/* The inner child has all namespaces that are
|
/* The inner child has all namespaces that are requested, so that we all are owned by the
|
||||||
* requested, so that we all are owned by the user if
|
* user if user namespaces are turned on. */
|
||||||
* user namespaces are turned on. */
|
|
||||||
|
|
||||||
if (arg_network_namespace_path) {
|
if (arg_network_namespace_path) {
|
||||||
r = namespace_enter(-1, -1, netns_fd, -1, -1);
|
r = namespace_enter(-1, -1, netns_fd, -1, -1);
|
||||||
|
|
Loading…
Reference in New Issue