Merge pull request #9744 from yuwata/fix-9737

Make RootImage= work with PrivateDevices=
This commit is contained in:
Lennart Poettering 2018-08-07 09:55:07 +02:00 committed by GitHub
commit 6f663594bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 83 additions and 50 deletions

View File

@ -124,7 +124,16 @@
partition table, or a file system within an MBR/MS-DOS or GPT partition table with only a single
Linux-compatible partition, or a set of file systems within a GPT partition table that follows the <ulink
url="https://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/">Discoverable Partitions
Specification</ulink>.</para></listitem>
Specification</ulink>.</para>
<para>When <varname>DevicePolicy=</varname> is set to <literal>closed</literal> or <literal>strict</literal>,
or set to <literal>auto</literal> and <varname>DeviceAllow=</varname> is set, then this setting adds
<filename>/dev/loop-control</filename> with <constant>rw</constant> mode, <literal>block-loop</literal> and
<literal>block-blkext</literal> with <constant>rwm</constant> mode to <varname>DeviceAllow=</varname>. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for the details about <varname>DevicePolicy=</varname> or <varname>DeviceAllow=</varname>. Also, see
<varname>PrivateDevices=</varname> below, as it may change the setting of <varname>DevicePolicy=</varname>.
</para></listitem>
</varlistentry>
<varlistentry>

View File

@ -304,6 +304,35 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
}
}
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
_cleanup_free_ CGroupDeviceAllow *a = NULL;
_cleanup_free_ char *d = NULL;
assert(c);
assert(dev);
assert(isempty(mode) || in_charset(mode, "rwm"));
a = new(CGroupDeviceAllow, 1);
if (!a)
return -ENOMEM;
d = strdup(dev);
if (!d)
return -ENOMEM;
*a = (CGroupDeviceAllow) {
.path = TAKE_PTR(d),
.r = isempty(mode) || !!strchr(mode, 'r'),
.w = isempty(mode) || !!strchr(mode, 'w'),
.m = isempty(mode) || !!strchr(mode, 'm'),
};
LIST_PREPEND(device_allow, c->device_allow, a);
TAKE_PTR(a);
return 0;
}
static int lookup_block_device(const char *p, dev_t *ret) {
struct stat st;
int r;

View File

@ -137,6 +137,8 @@ void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
CGroupMask unit_get_own_mask(Unit *u);
CGroupMask unit_get_delegate_mask(Unit *u);
CGroupMask unit_get_members_mask(Unit *u);

View File

@ -3212,7 +3212,6 @@ int config_parse_device_allow(
_cleanup_free_ char *path = NULL, *resolved = NULL;
CGroupContext *c = data;
CGroupDeviceAllow *a;
const char *p = rvalue;
int r;
@ -3261,17 +3260,7 @@ int config_parse_device_allow(
return 0;
}
a = new0(CGroupDeviceAllow, 1);
if (!a)
return log_oom();
a->path = TAKE_PTR(resolved);
a->r = isempty(p) || !!strchr(p, 'r');
a->w = isempty(p) || !!strchr(p, 'w');
a->m = isempty(p) || !!strchr(p, 'm');
LIST_PREPEND(device_allow, c->device_allow, a);
return 0;
return cgroup_add_device_allow(c, resolved, p);
}
int config_parse_io_device_weight(

View File

@ -1029,24 +1029,21 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self
return r;
}
static bool namespace_info_mount_apivfs(const char *root_directory, const NamespaceInfo *ns_info) {
static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) {
assert(ns_info);
/*
* ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=,
* since to protect the API VFS mounts, they need to be around in the
* first place... and RootDirectory= or RootImage= need to be set.
* first place...
*/
/* root_directory should point to a mount point */
return root_directory &&
(ns_info->mount_apivfs ||
ns_info->protect_control_groups ||
ns_info->protect_kernel_tunables);
return ns_info->mount_apivfs ||
ns_info->protect_control_groups ||
ns_info->protect_kernel_tunables;
}
static size_t namespace_calculate_mounts(
const char* root_directory,
const NamespaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
@ -1088,10 +1085,11 @@ static size_t namespace_calculate_mounts(
(ns_info->protect_control_groups ? 1 : 0) +
(ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
protect_home_cnt + protect_system_cnt +
(namespace_info_mount_apivfs(root_directory, ns_info) ? ELEMENTSOF(apivfs_table) : 0);
(namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0);
}
static void normalize_mounts(const char *root_directory, MountEntry *mounts, size_t *n_mounts) {
assert(root_directory);
assert(n_mounts);
assert(mounts || *n_mounts == 0);
@ -1127,11 +1125,9 @@ int setup_namespace(
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
_cleanup_free_ void *root_hash = NULL;
MountEntry *m, *mounts = NULL;
size_t root_hash_size = 0;
const char *root;
size_t n_mounts;
bool make_slave;
size_t n_mounts, root_hash_size = 0;
bool require_prefix = false;
const char *root;
int r = 0;
assert(ns_info);
@ -1181,7 +1177,6 @@ int setup_namespace(
}
n_mounts = namespace_calculate_mounts(
root,
ns_info,
read_write_paths,
read_only_paths,
@ -1192,9 +1187,6 @@ int setup_namespace(
tmp_dir, var_tmp_dir,
protect_home, protect_system);
/* Set mount slave mode */
make_slave = root || n_mounts > 0 || ns_info->private_mounts;
if (n_mounts > 0) {
m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix);
@ -1271,7 +1263,7 @@ int setup_namespace(
if (r < 0)
goto finish;
if (namespace_info_mount_apivfs(root, ns_info)) {
if (namespace_info_mount_apivfs(ns_info)) {
r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table), ns_info->ignore_protect_paths);
if (r < 0)
goto finish;
@ -1284,7 +1276,7 @@ int setup_namespace(
if (r < 0)
goto finish;
normalize_mounts(root_directory, mounts, &n_mounts);
normalize_mounts(root, mounts, &n_mounts);
}
if (unshare(CLONE_NEWNS) < 0) {
@ -1292,13 +1284,11 @@ int setup_namespace(
goto finish;
}
if (make_slave) {
/* Remount / as SLAVE so that nothing now mounted in the namespace
shows up in the parent */
if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
r = -errno;
goto finish;
}
/* Remount / as SLAVE so that nothing now mounted in the namespace
* shows up in the parent */
if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
r = -errno;
goto finish;
}
if (root_image) {
@ -1328,7 +1318,7 @@ int setup_namespace(
}
}
} else if (root) {
} else {
/* Let's mount the main root directory to the root directory to use */
if (mount("/", root, NULL, MS_BIND|MS_REC, NULL) < 0) {
@ -1385,7 +1375,7 @@ int setup_namespace(
if (!again)
break;
normalize_mounts(root_directory, mounts, &n_mounts);
normalize_mounts(root, mounts, &n_mounts);
}
/* Create a blacklist we can pass to bind_mount_recursive() */
@ -1402,12 +1392,10 @@ int setup_namespace(
}
}
if (root) {
/* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
r = mount_move_root(root);
if (r < 0)
goto finish;
}
/* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
r = mount_move_root(root);
if (r < 0)
goto finish;
/* Remount / as the desired mode. Note that this will not
* reestablish propagation from our side to the host, since

View File

@ -4143,12 +4143,28 @@ int unit_patch_contexts(Unit *u) {
}
cc = unit_get_cgroup_context(u);
if (cc) {
if (cc && ec) {
if (ec &&
ec->private_devices &&
if (ec->private_devices &&
cc->device_policy == CGROUP_AUTO)
cc->device_policy = CGROUP_CLOSED;
if (ec->root_image &&
(cc->device_policy != CGROUP_AUTO || cc->device_allow)) {
/* When RootImage= is specified, the following devices are touched. */
r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw");
if (r < 0)
return r;
r = cgroup_add_device_allow(cc, "block-loop", "rwm");
if (r < 0)
return r;
r = cgroup_add_device_allow(cc, "block-blkext", "rwm");
if (r < 0)
return r;
}
}
return 0;