core: add new setting TemporaryFileSystem=

This introduces a new setting TemporaryFileSystem=. This is useful
to hide files not relevant to the processes invoked by unit, while
necessary files or directories can be still accessed by combining
with Bind{,ReadOnly}Paths=.
This commit is contained in:
Yu Watanabe 2018-02-21 09:17:52 +09:00
parent 4ca763a902
commit 2abd4e388a
8 changed files with 243 additions and 8 deletions

View file

@ -1793,6 +1793,9 @@ static bool exec_needs_mount_namespace(
if (context->n_bind_mounts > 0)
return true;
if (context->n_temporary_filesystems > 0)
return true;
if (context->mount_flags != 0)
return true;
@ -2371,6 +2374,8 @@ static int apply_mount_namespace(
empty_directories,
bind_mounts,
n_bind_mounts,
context->temporary_filesystems,
context->n_temporary_filesystems,
tmp,
var,
needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
@ -3623,6 +3628,9 @@ void exec_context_done(ExecContext *c) {
bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
c->bind_mounts = NULL;
c->n_bind_mounts = 0;
temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
c->temporary_filesystems = NULL;
c->n_temporary_filesystems = 0;
c->cpuset = cpu_set_mfree(c->cpuset);
@ -4181,6 +4189,16 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
c->bind_mounts[i].destination,
c->bind_mounts[i].recursive ? "rbind" : "norbind");
if (c->n_temporary_filesystems > 0)
for (i = 0; i < c->n_temporary_filesystems; i++) {
TemporaryFileSystem *t = c->temporary_filesystems + i;
fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
t->path,
isempty(t->options) ? "" : ":",
strempty(t->options));
}
if (c->utmp_id)
fprintf(f,
"%sUtmpIdentifier: %s\n",

View file

@ -219,6 +219,8 @@ struct ExecContext {
unsigned long mount_flags;
BindMount *bind_mounts;
unsigned n_bind_mounts;
TemporaryFileSystem *temporary_filesystems;
unsigned n_temporary_filesystems;
uint64_t capability_bounding_set;
uint64_t capability_ambient_set;

View file

@ -104,6 +104,7 @@ $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0,
$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
$1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
$1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
$1.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof($1, exec_context)
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)

View file

@ -4174,6 +4174,83 @@ int config_parse_namespace_path_strv(
return 0;
}
int config_parse_temporary_filesystems(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
Unit *u = userdata;
ExecContext *c = data;
const char *cur;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
if (isempty(rvalue)) {
/* Empty assignment resets the list */
temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
c->temporary_filesystems = NULL;
c->n_temporary_filesystems = 0;
return 0;
}
cur = rvalue;
for (;;) {
_cleanup_free_ char *word = NULL, *path = NULL, *resolved = NULL;
const char *w;
r = extract_first_word(&cur, &word, NULL, EXTRACT_QUOTES);
if (r == 0)
break;
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue);
return 0;
}
w = word;
r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
if (r < 0)
return r;
if (r == 0)
return -EINVAL;
r = unit_full_printf(u, path, &resolved);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve specifiers in %s, ignoring: %m", word);
continue;
}
if (!path_is_absolute(resolved)) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute path, ignoring: %s", resolved);
continue;
}
path_kill_slashes(resolved);
r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, w);
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse mount options, ignoring: %s", word);
continue;
}
}
return 0;
}
int config_parse_bind_paths(
const char *unit,
const char *filename,

View file

@ -106,6 +106,7 @@ int config_parse_runtime_preserve_mode(const char *unit, const char *filename, u
int config_parse_exec_directories(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_set_status(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_temporary_filesystems(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_protect_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);

View file

@ -65,6 +65,7 @@ typedef enum MountMode {
PROCFS,
READONLY,
READWRITE,
TMPFS,
} MountMode;
typedef struct MountEntry {
@ -76,6 +77,9 @@ typedef struct MountEntry {
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
const char *source_const; /* The source path, for bind mounts */
char *source_malloc;
const char *options_const;/* Mount options for tmpfs */
char *options_malloc;
unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */
} MountEntry;
/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
@ -185,11 +189,18 @@ static const char *mount_entry_source(const MountEntry *p) {
return p->source_malloc ?: p->source_const;
}
static const char *mount_entry_options(const MountEntry *p) {
assert(p);
return p->options_malloc ?: p->options_const;
}
static void mount_entry_done(MountEntry *p) {
assert(p);
p->path_malloc = mfree(p->path_malloc);
p->source_malloc = mfree(p->source_malloc);
p->options_malloc = mfree(p->options_malloc);
}
static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, bool forcibly_require_prefix) {
@ -243,6 +254,8 @@ static int append_empty_dir_mounts(MountEntry **p, char **strv) {
.ignore = false,
.has_prefix = false,
.read_only = true,
.options_const = "mode=755",
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
};
}
@ -269,6 +282,49 @@ static int append_bind_mounts(MountEntry **p, const BindMount *binds, unsigned n
return 0;
}
static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, unsigned n) {
unsigned i;
int r;
assert(p);
for (i = 0; i < n; i++) {
const TemporaryFileSystem *t = tmpfs + i;
_cleanup_free_ char *o = NULL, *str = NULL;
unsigned long flags = MS_NODEV|MS_STRICTATIME;
bool ro = false;
if (!path_is_absolute(t->path))
return -EINVAL;
if (!isempty(t->options)) {
str = strjoin("mode=0755,", t->options);
if (!str)
return -ENOMEM;
r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o);
if (r < 0)
return r;
ro = !!(flags & MS_RDONLY);
if (ro)
flags ^= MS_RDONLY;
}
*((*p)++) = (MountEntry) {
.path_const = t->path,
.mode = TMPFS,
.read_only = ro,
.options_malloc = o,
.flags = flags,
};
o = NULL;
}
return 0;
}
static int append_static_mounts(MountEntry **p, const MountEntry *mounts, unsigned n, bool ignore_protect) {
unsigned i;
@ -711,15 +767,15 @@ static int mount_procfs(const MountEntry *m) {
return 1;
}
static int mount_empty_dir(const MountEntry *m) {
static int mount_tmpfs(const MountEntry *m) {
assert(m);
/* First, get rid of everything that is below if there is anything. Then, overmount with our new empty dir */
/* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */
(void) mkdir_p_label(mount_entry_path(m), 0755);
(void) umount_recursive(mount_entry_path(m), 0);
if (mount("tmpfs", mount_entry_path(m), "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, "mode=755") < 0)
if (mount("tmpfs", mount_entry_path(m), "tmpfs", m->flags, mount_entry_options(m)) < 0)
return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m));
return 1;
@ -821,7 +877,8 @@ static int apply_mount(
break;
case EMPTY_DIR:
return mount_empty_dir(m);
case TMPFS:
return mount_tmpfs(m);
case PRIVATE_TMP:
what = mount_entry_source(m);
@ -887,9 +944,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self
assert(m);
assert(proc_self_mountinfo);
if (mount_entry_read_only(m))
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
else if (m->mode == PRIVATE_DEV) { /* Superblock can be readonly but the submounts can't */
if (mount_entry_read_only(m)) {
if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
/* Make superblock readonly */
if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0)
r = -errno;
} else
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
} else if (m->mode == PRIVATE_DEV) {
/* Superblock can be readonly but the submounts can't */
if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
r = -errno;
} else
@ -929,6 +992,7 @@ static unsigned namespace_calculate_mounts(
char** inaccessible_paths,
char** empty_directories,
unsigned n_bind_mounts,
unsigned n_temporary_filesystems,
const char* tmp_dir,
const char* var_tmp_dir,
ProtectHome protect_home,
@ -955,6 +1019,7 @@ static unsigned namespace_calculate_mounts(
strv_length(inaccessible_paths) +
strv_length(empty_directories) +
n_bind_mounts +
n_temporary_filesystems +
ns_info->private_dev +
(ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
(ns_info->protect_control_groups ? 1 : 0) +
@ -973,6 +1038,8 @@ int setup_namespace(
char** empty_directories,
const BindMount *bind_mounts,
unsigned n_bind_mounts,
const TemporaryFileSystem *temporary_filesystems,
unsigned n_temporary_filesystems,
const char* tmp_dir,
const char* var_tmp_dir,
ProtectHome protect_home,
@ -1024,7 +1091,7 @@ int setup_namespace(
if (root_directory)
root = root_directory;
else if (root_image || n_bind_mounts > 0) {
else if (root_image || n_bind_mounts > 0 || n_temporary_filesystems > 0) {
/* If we are booting from an image, create a mount point for the image, if it's still missing. We use
* the same mount point for all images, which is safe, since they all live in their own namespaces
@ -1046,6 +1113,7 @@ int setup_namespace(
inaccessible_paths,
empty_directories,
n_bind_mounts,
n_temporary_filesystems,
tmp_dir, var_tmp_dir,
protect_home, protect_system);
@ -1075,6 +1143,10 @@ int setup_namespace(
if (r < 0)
goto finish;
r = append_tmpfs_mounts(&m, temporary_filesystems, n_temporary_filesystems);
if (r < 0)
goto finish;
if (tmp_dir) {
*(m++) = (MountEntry) {
.path_const = "/tmp",
@ -1305,6 +1377,57 @@ int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item) {
return 0;
}
void temporary_filesystem_free_many(TemporaryFileSystem *t, unsigned n) {
unsigned i;
assert(t || n == 0);
for (i = 0; i < n; i++) {
free(t[i].path);
free(t[i].options);
}
free(t);
}
int temporary_filesystem_add(
TemporaryFileSystem **t,
unsigned *n,
const char *path,
const char *options) {
_cleanup_free_ char *p = NULL, *o = NULL;
TemporaryFileSystem *c;
assert(t);
assert(n);
assert(path);
p = strdup(path);
if (!p)
return -ENOMEM;
if (!isempty(options)) {
o = strdup(options);
if (!o)
return -ENOMEM;
}
c = realloc_multiply(*t, sizeof(TemporaryFileSystem), *n + 1);
if (!c)
return -ENOMEM;
*t = c;
c[(*n) ++] = (TemporaryFileSystem) {
.path = p,
.options = o,
};
p = o = NULL;
return 0;
}
static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
_cleanup_free_ char *x = NULL;
char bid[SD_ID128_STRING_MAX];

View file

@ -23,6 +23,7 @@
typedef struct NamespaceInfo NamespaceInfo;
typedef struct BindMount BindMount;
typedef struct TemporaryFileSystem TemporaryFileSystem;
#include <stdbool.h>
@ -75,6 +76,11 @@ struct BindMount {
bool ignore_enoent:1;
};
struct TemporaryFileSystem {
char *path;
char *options;
};
int setup_namespace(
const char *root_directory,
const char *root_image,
@ -85,6 +91,8 @@ int setup_namespace(
char **empty_directories,
const BindMount *bind_mounts,
unsigned n_bind_mounts,
const TemporaryFileSystem *temporary_filesystems,
unsigned n_temporary_filesystems,
const char *tmp_dir,
const char *var_tmp_dir,
ProtectHome protect_home,
@ -110,6 +118,10 @@ ProtectSystem parse_protect_system_or_bool(const char *s);
void bind_mount_free_many(BindMount *b, unsigned n);
int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item);
void temporary_filesystem_free_many(TemporaryFileSystem *t, unsigned n);
int temporary_filesystem_add(TemporaryFileSystem **t, unsigned *n,
const char *path, const char *options);
const char* namespace_type_to_string(NamespaceType t) _const_;
NamespaceType namespace_type_from_string(const char *s) _pure_;

View file

@ -86,6 +86,7 @@ int main(int argc, char *argv[]) {
(char **) inaccessible,
NULL,
&(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,
&(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1,
tmp_dir,
var_tmp_dir,
PROTECT_HOME_NO,