core: hide /run/credentials whenever namespacing is requested

Ideally we would like to hide all other service's credentials for all
services. That would imply for us to enable mount namespacing for all
services, which is something we cannot do, both due to compatibility
with the status quo ante, and because a number of services legitimately
should be able to install mounts in the host hierarchy.

Hence we do the second best thing, we hide the credentials automatically
for all services that opt into mount namespacing otherwise. This is
quite different from other mount sandboxing options: usually you have to
explicitly opt into each. However, given that the credentials logic is a
brand new concept we invented right here and now, and particularly
security sensitive it's OK to reverse this, and by default hide
credentials whenever we can (i.e. whenever mount namespacing is
otherwise opt-ed in to).

Long story short: if you want to hide other service's credentials, the
most basic options is to just turn on PrivateMounts= and there you go,
they should all be gone.
This commit is contained in:
Lennart Poettering 2020-08-14 15:54:48 +02:00
parent bb0c0d6f29
commit bbb4e7f39f
6 changed files with 47 additions and 0 deletions

View File

@ -3056,6 +3056,7 @@ static int apply_mount_namespace(
_cleanup_strv_free_ char **empty_directories = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
const char *root_dir = NULL, *root_image = NULL;
_cleanup_free_ char *creds_path = NULL;
NamespaceInfo ns_info;
bool needs_sandboxing;
BindMount *bind_mounts = NULL;
@ -3124,6 +3125,12 @@ static int apply_mount_namespace(
if (context->mount_flags == MS_SHARED)
log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
if (exec_context_has_credentials(context) && params->prefix[EXEC_DIRECTORY_RUNTIME]) {
creds_path = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials", u->id);
if (!creds_path)
return -ENOMEM;
}
r = setup_namespace(root_dir, root_image, context->root_image_options,
&ns_info, context->read_write_paths,
needs_sandboxing ? context->read_only_paths : NULL,
@ -3137,6 +3144,7 @@ static int apply_mount_namespace(
context->n_mount_images,
tmp_dir,
var_tmp_dir,
creds_path,
context->log_namespace,
context->mount_flags,
context->root_hash, context->root_hash_size, context->root_hash_path,

View File

@ -537,6 +537,9 @@ int mount_setup(bool loaded_policy, bool leave_propagation) {
(void) mkdir_label("/run/systemd", 0755);
(void) mkdir_label("/run/systemd/system", 0755);
/* Make sure we have a mount point to hide in sandboxes */
(void) mkdir_label("/run/credentials", 0755);
/* Also create /run/systemd/inaccessible nodes, so that we always have something to mount
* inaccessible nodes from. If we run in a container the host might have created these for us already
* in /run/host/inaccessible/. Use those if we can, since tht way we likely get access to block/char

View File

@ -1270,6 +1270,7 @@ static size_t namespace_calculate_mounts(
size_t n_mount_images,
const char* tmp_dir,
const char* var_tmp_dir,
const char *creds_path,
const char* log_namespace) {
size_t protect_home_cnt;
@ -1305,6 +1306,7 @@ static size_t namespace_calculate_mounts(
protect_home_cnt + protect_system_cnt +
(ns_info->protect_hostname ? 2 : 0) +
(namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0) +
(creds_path ? 2 : 1) +
!!log_namespace;
}
@ -1389,6 +1391,7 @@ int setup_namespace(
size_t n_mount_images,
const char* tmp_dir,
const char* var_tmp_dir,
const char *creds_path,
const char *log_namespace,
unsigned long mount_flags,
const void *root_hash,
@ -1494,6 +1497,7 @@ int setup_namespace(
n_temporary_filesystems,
n_mount_images,
tmp_dir, var_tmp_dir,
creds_path,
log_namespace);
if (n_mounts > 0) {
@ -1619,6 +1623,35 @@ int setup_namespace(
};
}
if (creds_path) {
/* If our service has a credentials store configured, then bind that one in, but hide
* everything else. */
*(m++) = (MountEntry) {
.path_const = "/run/credentials",
.mode = TMPFS,
.read_only = true,
.options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST,
.flags = MS_NODEV|MS_STRICTATIME|MS_NOSUID|MS_NOEXEC,
};
*(m++) = (MountEntry) {
.path_const = creds_path,
.mode = BIND_MOUNT,
.read_only = true,
.source_const = creds_path,
};
} else {
/* If our service has no credentials store configured, then make the whole
* credentials tree inaccessible wholesale. */
*(m++) = (MountEntry) {
.path_const = "/run/credentials",
.mode = INACCESSIBLE,
.ignore = true,
};
}
if (log_namespace) {
_cleanup_free_ char *q;

View File

@ -117,6 +117,7 @@ int setup_namespace(
size_t n_mount_images,
const char *tmp_dir,
const char *var_tmp_dir,
const char *creds_path,
const char *log_namespace,
unsigned long mount_flags,
const void *root_hash,

View File

@ -163,6 +163,7 @@ static void test_protect_kernel_logs(void) {
NULL,
NULL,
NULL,
NULL,
0,
NULL,
0,

View File

@ -78,6 +78,7 @@ int main(int argc, char *argv[]) {
tmp_dir,
var_tmp_dir,
NULL,
NULL,
0,
NULL,
0,