pid1: create ro private tmp dirs when /tmp or /var/tmp is read-only

Read-only /var/tmp is more likely, because it's backed by a real device. /tmp
is (by default) backed by tmpfs, but it doesn't have to be. In both cases the
same consideration applies.

If we boot with read-only /var/tmp, any unit with PrivateTmp=yes would fail
because we cannot create the subdir under /var/tmp to mount the private directory.
But many services actually don't require /var/tmp (either because they only use
it occasionally, or because they only use /tmp, or even because they don't use the
temporary directories at all, and PrivateTmp=yes is used to isolate them from
the rest of the system).

To handle both cases let's create a read-only directory under /run/systemd and
mount it as the private /tmp or /var/tmp. (Read-only to not fool the service into
dumping too much data in /run.)

$ sudo systemd-run -t -p PrivateTmp=yes bash
Running as unit: run-u14.service
Press ^] three times within 1s to disconnect TTY.
[root@workstation /]# ls -l /tmp/
total 0
[root@workstation /]# ls -l /var/tmp/
total 0
[root@workstation /]# touch /tmp/f
[root@workstation /]# touch /var/tmp/f
touch: cannot touch '/var/tmp/f': Read-only file system

This commit has more changes than I like to put in one commit, but it's touching all
the same paths so it's hard to split.
exec_runtime_make() was using the wrong cleanup function, so the directory would be
left behind on error.
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2020-06-28 19:54:49 +02:00
parent cbc056c819
commit 56a13a495c
6 changed files with 175 additions and 126 deletions

View file

@ -2592,7 +2592,7 @@ static int apply_mount_namespace(
char **error_path) {
_cleanup_strv_free_ char **empty_directories = NULL;
char *tmp = NULL, *var = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
const char *root_dir = NULL, *root_image = NULL;
NamespaceInfo ns_info;
bool needs_sandboxing;
@ -2617,13 +2617,19 @@ static int apply_mount_namespace(
if (needs_sandboxing) {
/* The runtime struct only contains the parent of the private /tmp,
* which is non-accessible to world users. Inside of it there's a /tmp
* that is sticky, and that's the one we want to use here. */
* that is sticky, and that's the one we want to use here.
* This does not apply when we are using /run/systemd/empty as fallback. */
if (context->private_tmp && runtime) {
if (runtime->tmp_dir)
tmp = strjoina(runtime->tmp_dir, "/tmp");
if (runtime->var_tmp_dir)
var = strjoina(runtime->var_tmp_dir, "/tmp");
if (streq_ptr(runtime->tmp_dir, RUN_SYSTEMD_EMPTY))
tmp_dir = runtime->tmp_dir;
else if (runtime->tmp_dir)
tmp_dir = strjoina(runtime->tmp_dir, "/tmp");
if (streq_ptr(runtime->var_tmp_dir, RUN_SYSTEMD_EMPTY))
var_tmp_dir = runtime->var_tmp_dir;
else if (runtime->tmp_dir)
var_tmp_dir = strjoina(runtime->var_tmp_dir, "/tmp");
}
ns_info = (NamespaceInfo) {
@ -2661,8 +2667,8 @@ static int apply_mount_namespace(
n_bind_mounts,
context->temporary_filesystems,
context->n_temporary_filesystems,
tmp,
var,
tmp_dir,
var_tmp_dir,
context->log_namespace,
needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
@ -5347,28 +5353,25 @@ static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
(void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
/* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
if (destroy && rt->tmp_dir) {
if (destroy && rt->tmp_dir && !streq(rt->tmp_dir, RUN_SYSTEMD_EMPTY)) {
log_debug("Spawning thread to nuke %s", rt->tmp_dir);
r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
if (r < 0) {
if (r < 0)
log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
free(rt->tmp_dir);
}
rt->tmp_dir = NULL;
else
rt->tmp_dir = NULL;
}
if (destroy && rt->var_tmp_dir) {
if (destroy && rt->var_tmp_dir && !streq(rt->var_tmp_dir, RUN_SYSTEMD_EMPTY)) {
log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
if (r < 0) {
if (r < 0)
log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
free(rt->var_tmp_dir);
}
rt->var_tmp_dir = NULL;
else
rt->var_tmp_dir = NULL;
}
rt->id = mfree(rt->id);
@ -5382,16 +5385,22 @@ static void exec_runtime_freep(ExecRuntime **rt) {
(void) exec_runtime_free(*rt, false);
}
static int exec_runtime_allocate(ExecRuntime **ret) {
static int exec_runtime_allocate(ExecRuntime **ret, const char *id) {
_cleanup_free_ char *id_copy = NULL;
ExecRuntime *n;
assert(ret);
id_copy = strdup(id);
if (!id_copy)
return -ENOMEM;
n = new(ExecRuntime, 1);
if (!n)
return -ENOMEM;
*n = (ExecRuntime) {
.id = TAKE_PTR(id_copy),
.netns_storage_socket = { -1, -1 },
};
@ -5402,9 +5411,9 @@ static int exec_runtime_allocate(ExecRuntime **ret) {
static int exec_runtime_add(
Manager *m,
const char *id,
const char *tmp_dir,
const char *var_tmp_dir,
const int netns_storage_socket[2],
char **tmp_dir,
char **var_tmp_dir,
int netns_storage_socket[2],
ExecRuntime **ret) {
_cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
@ -5413,51 +5422,40 @@ static int exec_runtime_add(
assert(m);
assert(id);
/* tmp_dir, var_tmp_dir, netns_storage_socket fds are donated on success */
r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
if (r < 0)
return r;
r = exec_runtime_allocate(&rt);
r = exec_runtime_allocate(&rt, id);
if (r < 0)
return r;
rt->id = strdup(id);
if (!rt->id)
return -ENOMEM;
if (tmp_dir) {
rt->tmp_dir = strdup(tmp_dir);
if (!rt->tmp_dir)
return -ENOMEM;
/* When tmp_dir is set, then we require var_tmp_dir is also set. */
assert(var_tmp_dir);
rt->var_tmp_dir = strdup(var_tmp_dir);
if (!rt->var_tmp_dir)
return -ENOMEM;
}
if (netns_storage_socket) {
rt->netns_storage_socket[0] = netns_storage_socket[0];
rt->netns_storage_socket[1] = netns_storage_socket[1];
}
r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
if (r < 0)
return r;
assert(!!rt->tmp_dir == !!rt->var_tmp_dir); /* We require both to be set together */
rt->tmp_dir = TAKE_PTR(*tmp_dir);
rt->var_tmp_dir = TAKE_PTR(*var_tmp_dir);
if (netns_storage_socket) {
rt->netns_storage_socket[0] = TAKE_FD(netns_storage_socket[0]);
rt->netns_storage_socket[1] = TAKE_FD(netns_storage_socket[1]);
}
rt->manager = m;
if (ret)
*ret = rt;
/* do not remove created ExecRuntime object when the operation succeeds. */
rt = NULL;
TAKE_PTR(rt);
return 0;
}
static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
_cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
_cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL;
_cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
int r;
@ -5483,12 +5481,10 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
return -errno;
}
r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ret);
if (r < 0)
return r;
/* Avoid cleanup */
netns_storage_socket[0] = netns_storage_socket[1] = -1;
return 1;
}
@ -5606,14 +5602,10 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value,
rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
if (!rt) {
r = exec_runtime_allocate(&rt_create);
r = exec_runtime_allocate(&rt_create, u->id);
if (r < 0)
return log_oom();
rt_create->id = strdup(u->id);
if (!rt_create->id)
return log_oom();
rt = rt_create;
}
@ -5670,15 +5662,16 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value,
rt_create->manager = u->manager;
/* Avoid cleanup */
rt_create = NULL;
TAKE_PTR(rt_create);
}
return 1;
}
void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
int r, fd0 = -1, fd1 = -1;
int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
_cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
char *id = NULL;
int r, fdpair[] = {-1, -1};
const char *p, *v = value;
size_t n;
@ -5695,7 +5688,9 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
v = startswith(p, "tmp-dir=");
if (v) {
n = strcspn(v, " ");
tmp_dir = strndupa(v, n);
tmp_dir = strndup(v, n);
if (!tmp_dir)
return log_oom();
if (v[n] != ' ')
goto finalize;
p = v + n + 1;
@ -5704,7 +5699,9 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
v = startswith(p, "var-tmp-dir=");
if (v) {
n = strcspn(v, " ");
var_tmp_dir = strndupa(v, n);
var_tmp_dir = strndup(v, n);
if (!var_tmp_dir)
return log_oom();
if (v[n] != ' ')
goto finalize;
p = v + n + 1;
@ -5716,11 +5713,9 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
n = strcspn(v, " ");
buf = strndupa(v, n);
if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
log_debug("Unable to process exec-runtime netns fd specification.");
return;
}
fd0 = fdset_remove(fds, fd0);
if (safe_atoi(buf, &fdpair[0]) < 0 || !fdset_contains(fds, fdpair[0]))
return log_debug("Unable to process exec-runtime netns fd specification.");
fdpair[0] = fdset_remove(fds, fdpair[0]);
if (v[n] != ' ')
goto finalize;
p = v + n + 1;
@ -5732,18 +5727,16 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
n = strcspn(v, " ");
buf = strndupa(v, n);
if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
log_debug("Unable to process exec-runtime netns fd specification.");
return;
}
fd1 = fdset_remove(fds, fd1);
if (safe_atoi(buf, &fdpair[1]) < 0 || !fdset_contains(fds, fdpair[1]))
return log_debug("Unable to process exec-runtime netns fd specification.");
fdpair[1] = fdset_remove(fds, fdpair[1]);
}
finalize:
r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, fdpair, NULL);
if (r < 0)
log_debug_errno(r, "Failed to add exec-runtime: %m");
return log_debug_errno(r, "Failed to add exec-runtime: %m");
return 0;
}
void exec_runtime_vacuum(Manager *m) {

View file

@ -405,7 +405,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy);
int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
void exec_runtime_vacuum(Manager *m);
void exec_params_clear(ExecParameters *p);

View file

@ -3685,7 +3685,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
else if ((val = startswith(l, "destroy-ipc-gid=")))
manager_deserialize_gid_refs_one(m, val);
else if ((val = startswith(l, "exec-runtime=")))
exec_runtime_deserialize_one(m, val, fds);
(void) exec_runtime_deserialize_one(m, val, fds);
else if ((val = startswith(l, "subscribed="))) {
if (strv_extend(&m->deserialized_subscribed, val) < 0)

View file

@ -43,6 +43,7 @@ typedef enum MountMode {
BIND_MOUNT,
BIND_MOUNT_RECURSIVE,
PRIVATE_TMP,
PRIVATE_TMP_READONLY,
PRIVATE_DEV,
BIND_DEV,
EMPTY_DIR,
@ -221,7 +222,7 @@ static const char *mount_entry_path(const MountEntry *p) {
static bool mount_entry_read_only(const MountEntry *p) {
assert(p);
return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE);
return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE, PRIVATE_TMP_READONLY);
}
static const char *mount_entry_source(const MountEntry *p) {
@ -1007,6 +1008,7 @@ static int apply_mount(
return mount_tmpfs(m);
case PRIVATE_TMP:
case PRIVATE_TMP_READONLY:
what = mount_entry_source(m);
make = true;
break;
@ -1398,17 +1400,21 @@ int setup_namespace(
goto finish;
if (tmp_dir) {
bool ro = streq(tmp_dir, RUN_SYSTEMD_EMPTY);
*(m++) = (MountEntry) {
.path_const = "/tmp",
.mode = PRIVATE_TMP,
.mode = ro ? PRIVATE_TMP_READONLY : PRIVATE_TMP,
.source_const = tmp_dir,
};
}
if (var_tmp_dir) {
bool ro = streq(var_tmp_dir, RUN_SYSTEMD_EMPTY);
*(m++) = (MountEntry) {
.path_const = "/var/tmp",
.mode = PRIVATE_TMP,
.mode = ro ? PRIVATE_TMP_READONLY : PRIVATE_TMP,
.source_const = var_tmp_dir,
};
}
@ -1815,10 +1821,28 @@ static int make_tmp_prefix(const char *prefix) {
}
static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
static int make_tmp_subdir(const char *parent, char **ret) {
_cleanup_free_ char *y = NULL;
RUN_WITH_UMASK(0000) {
y = strjoin(parent, "/tmp");
if (!y)
return -ENOMEM;
if (mkdir(y, 0777 | S_ISVTX) < 0)
return -errno;
}
if (ret)
*ret = TAKE_PTR(y);
return 0;
}
static int setup_one_tmp_dir(const char *id, const char *prefix, char **path, char **tmp_path) {
_cleanup_free_ char *x = NULL;
char bid[SD_ID128_STRING_MAX];
sd_id128_t boot_id;
bool rw = true;
int r;
assert(id);
@ -1841,49 +1865,55 @@ static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
return r;
RUN_WITH_UMASK(0077)
if (!mkdtemp(x))
return -errno;
if (!mkdtemp(x)) {
if (errno == EROFS || ERRNO_IS_DISK_SPACE(errno))
rw = false;
else
return -errno;
}
RUN_WITH_UMASK(0000) {
char *y;
if (rw) {
r = make_tmp_subdir(x, tmp_path);
if (r < 0)
return r;
} else {
/* Trouble: we failed to create the directory. Instead of failing, let's simulate /tmp being
* read-only. This way the service will get the EROFS result as if it was writing to the real
* file system. */
r = mkdir_p(RUN_SYSTEMD_EMPTY, 0500);
if (r < 0)
return r;
y = strjoina(x, "/tmp");
if (mkdir(y, 0777 | S_ISVTX) < 0)
return -errno;
x = strdup(RUN_SYSTEMD_EMPTY);
if (!x)
return -ENOMEM;
}
*path = TAKE_PTR(x);
return 0;
}
int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
char *a, *b;
_cleanup_(namespace_cleanup_tmpdirp) char *a = NULL;
_cleanup_(rmdir_and_freep) char *a_tmp = NULL;
char *b;
int r;
assert(id);
assert(tmp_dir);
assert(var_tmp_dir);
r = setup_one_tmp_dir(id, "/tmp", &a);
r = setup_one_tmp_dir(id, "/tmp", &a, &a_tmp);
if (r < 0)
return r;
r = setup_one_tmp_dir(id, "/var/tmp", &b);
if (r < 0) {
char *t;
t = strjoina(a, "/tmp");
(void) rmdir(t);
(void) rmdir(a);
free(a);
r = setup_one_tmp_dir(id, "/var/tmp", &b, NULL);
if (r < 0)
return r;
}
*tmp_dir = a;
*var_tmp_dir = b;
a_tmp = mfree(a_tmp); /* avoid rmdir */
*tmp_dir = TAKE_PTR(a);
*var_tmp_dir = TAKE_PTR(b);
return 0;
}

View file

@ -12,7 +12,9 @@ typedef struct TemporaryFileSystem TemporaryFileSystem;
#include <stdbool.h>
#include "dissect-image.h"
#include "fs-util.h"
#include "macro.h"
#include "string-util.h"
typedef enum ProtectHome {
PROTECT_HOME_NO,
@ -98,6 +100,16 @@ int setup_namespace(
DissectImageFlags dissected_image_flags,
char **error_path);
#define RUN_SYSTEMD_EMPTY "/run/systemd/empty"
static inline void namespace_cleanup_tmpdir(char *p) {
PROTECT_ERRNO;
if (!streq_ptr(p, RUN_SYSTEMD_EMPTY))
(void) rmdir(p);
free(p);
}
DEFINE_TRIVIAL_CLEANUP_FUNC(char*, namespace_cleanup_tmpdir);
int setup_tmp_dirs(
const char *id,
char **tmp_dir,

View file

@ -14,14 +14,25 @@
#include "util.h"
#include "virt.h"
static void test_namespace_cleanup_tmpdir(void) {
{
_cleanup_(namespace_cleanup_tmpdirp) char *dir;
assert_se(dir = strdup(RUN_SYSTEMD_EMPTY));
}
{
_cleanup_(namespace_cleanup_tmpdirp) char *dir;
assert_se(dir = strdup("/tmp/systemd-test-namespace.XXXXXX"));
assert_se(mkdtemp(dir));
}
}
static void test_tmpdir(const char *id, const char *A, const char *B) {
_cleanup_free_ char *a, *b;
struct stat x, y;
char *c, *d;
assert_se(setup_tmp_dirs(id, &a, &b) == 0);
assert_se(startswith(a, A));
assert_se(startswith(b, B));
assert_se(stat(a, &x) >= 0);
assert_se(stat(b, &y) >= 0);
@ -29,26 +40,27 @@ static void test_tmpdir(const char *id, const char *A, const char *B) {
assert_se(S_ISDIR(x.st_mode));
assert_se(S_ISDIR(y.st_mode));
assert_se((x.st_mode & 01777) == 0700);
assert_se((y.st_mode & 01777) == 0700);
if (!streq(a, RUN_SYSTEMD_EMPTY)) {
assert_se(startswith(a, A));
assert_se((x.st_mode & 01777) == 0700);
c = strjoina(a, "/tmp");
assert_se(stat(c, &x) >= 0);
assert_se(S_ISDIR(x.st_mode));
assert_se((x.st_mode & 01777) == 01777);
assert_se(rmdir(c) >= 0);
assert_se(rmdir(a) >= 0);
}
c = strjoina(a, "/tmp");
d = strjoina(b, "/tmp");
assert_se(stat(c, &x) >= 0);
assert_se(stat(d, &y) >= 0);
assert_se(S_ISDIR(x.st_mode));
assert_se(S_ISDIR(y.st_mode));
assert_se((x.st_mode & 01777) == 01777);
assert_se((y.st_mode & 01777) == 01777);
assert_se(rmdir(c) >= 0);
assert_se(rmdir(d) >= 0);
assert_se(rmdir(a) >= 0);
assert_se(rmdir(b) >= 0);
if (!streq(b, RUN_SYSTEMD_EMPTY)) {
assert_se(startswith(b, B));
assert_se((y.st_mode & 01777) == 0700);
d = strjoina(b, "/tmp");
assert_se(stat(d, &y) >= 0);
assert_se(S_ISDIR(y.st_mode));
assert_se((y.st_mode & 01777) == 01777);
assert_se(rmdir(d) >= 0);
assert_se(rmdir(b) >= 0);
}
}
static void test_netns(void) {
@ -180,6 +192,8 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_INFO);
test_namespace_cleanup_tmpdir();
if (!have_namespaces()) {
log_tests_skipped("Don't have namespace support");
return EXIT_TEST_SKIP;