Merge pull request #16681 from poettering/hidepid
core: introduce ProtectProc= unit file setting for exposing procfs' hidepid= mount option
This commit is contained in:
commit
6944adbbe0
|
@ -151,6 +151,8 @@ All execution-related settings are available for transient units.
|
|||
✓ TimerSlackNSec=
|
||||
✓ NoNewPrivileges=
|
||||
✓ KeyringMode=
|
||||
✓ ProtectProc=
|
||||
✓ ProcSubset=
|
||||
✓ SystemCallFilter=
|
||||
✓ SystemCallArchitectures=
|
||||
✓ SystemCallErrorNumber=
|
||||
|
|
|
@ -267,6 +267,55 @@
|
|||
<xi:include href="system-only.xml" xpointer="singular"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>ProtectProc=</varname></term>
|
||||
|
||||
<listitem><para>Takes one of <literal>noaccess</literal>, <literal>invisible</literal>,
|
||||
<literal>ptraceable</literal> or <literal>default</literal> (which it defaults to). When set, this
|
||||
controls the <literal>hidepid=</literal> mount option of the <literal>procfs</literal> instance for
|
||||
the unit that controls which directories with process metainformation
|
||||
(<filename>/proc/<replaceable>PID</replaceable></filename>) are visible and accessible: when set to
|
||||
<literal>noaccess</literal> the ability to access most of other users' process metadata in
|
||||
<filename>/proc/</filename> is taken away for processes of the service. When set to
|
||||
<literal>invisible</literal> processes owned by other users are hidden from
|
||||
<filename>/proc/</filename>. If <literal>ptraceable</literal> all processes that cannot be
|
||||
<function>ptrace()</function>'ed by a process are hidden to it. If <literal>default</literal> no
|
||||
restrictions on <filename>/proc/</filename> access or visibility are made. For further details see
|
||||
<ulink url="https://www.kernel.org/doc/html/latest/filesystems/proc.html#mount-options">The /proc
|
||||
Filesystem</ulink>. It is generally recommended to run most system services with this option set to
|
||||
<literal>invisible</literal>. This option is implemented via file system namespacing, and thus cannot
|
||||
be used with services that shall be able to install mount points in the host file system
|
||||
hierarchy. It also cannot be used for services that need to access metainformation about other users'
|
||||
processes. This option implies <varname>MountAPIVFS=</varname>.</para>
|
||||
|
||||
<para>If the kernel doesn't support per-mount point <option>hidepid=</option> mount options this
|
||||
setting remains without effect, and the unit's processes will be able to access and see other process
|
||||
as if the option was not used.</para>
|
||||
|
||||
<xi:include href="system-only.xml" xpointer="singular"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>ProcSubset=</varname></term>
|
||||
|
||||
<listitem><para>Takes one of <literal>all</literal> (the default) and <literal>pid</literal>. If
|
||||
the latter all files and directories not directly associated with process management and introspection
|
||||
are made invisible in the <filename>/proc/</filename> file system configured for the unit's
|
||||
processes. This controls the <literal>subset=</literal> mount option of the <literal>procfs</literal>
|
||||
instance for the unit. For further details see <ulink
|
||||
url="https://www.kernel.org/doc/html/latest/filesystems/proc.html#mount-options">The /proc
|
||||
Filesystem</ulink>. Note that Linux exposes various kernel APIs via <filename>/proc/</filename>,
|
||||
which are made unavailable with this setting. Since these APIs are used frequently this option is
|
||||
useful only in a few, specific cases, and is not suitable for most non-trivial programs.</para>
|
||||
|
||||
<para>Much like <varname>ProtectProc=</varname> above, this is implemented via file system mount
|
||||
namespacing, and hence the same restrictions apply: it is only available to system services, it
|
||||
disables mount propagation to the host mount table, and it implies
|
||||
<varname>MountAPIVFS=</varname>. Also, like <varname>ProtectProc=</varname> this setting is gracefully
|
||||
disabled if the used kernel does not support the <literal>subset=</literal> mount option of
|
||||
<literal>procfs</literal>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>BindPaths=</varname></term>
|
||||
<term><varname>BindReadOnlyPaths=</varname></term>
|
||||
|
|
|
@ -50,6 +50,8 @@ struct security_info {
|
|||
bool ip_filters_custom_egress;
|
||||
|
||||
char *keyring_mode;
|
||||
char *protect_proc;
|
||||
char *proc_subset;
|
||||
bool lock_personality;
|
||||
bool memory_deny_write_execute;
|
||||
bool no_new_privileges;
|
||||
|
@ -135,6 +137,8 @@ static void security_info_free(struct security_info *i) {
|
|||
free(i->root_image);
|
||||
|
||||
free(i->keyring_mode);
|
||||
free(i->protect_proc);
|
||||
free(i->proc_subset);
|
||||
free(i->notify_access);
|
||||
|
||||
free(i->device_policy);
|
||||
|
@ -388,6 +392,44 @@ static int assess_keyring_mode(
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int assess_protect_proc(
|
||||
const struct security_assessor *a,
|
||||
const struct security_info *info,
|
||||
const void *data,
|
||||
uint64_t *ret_badness,
|
||||
char **ret_description) {
|
||||
|
||||
assert(ret_badness);
|
||||
assert(ret_description);
|
||||
|
||||
if (streq_ptr(info->protect_proc, "noaccess"))
|
||||
*ret_badness = 1;
|
||||
else if (STRPTR_IN_SET(info->protect_proc, "invisible", "ptraceable"))
|
||||
*ret_badness = 0;
|
||||
else
|
||||
*ret_badness = 3;
|
||||
|
||||
*ret_description = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int assess_proc_subset(
|
||||
const struct security_assessor *a,
|
||||
const struct security_info *info,
|
||||
const void *data,
|
||||
uint64_t *ret_badness,
|
||||
char **ret_description) {
|
||||
|
||||
assert(ret_badness);
|
||||
assert(ret_description);
|
||||
|
||||
*ret_badness = !streq_ptr(info->proc_subset, "pid");
|
||||
*ret_description = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int assess_notify_access(
|
||||
const struct security_assessor *a,
|
||||
const struct security_info *info,
|
||||
|
@ -1149,6 +1191,24 @@ static const struct security_assessor security_assessor_table[] = {
|
|||
.range = 1,
|
||||
.assess = assess_keyring_mode,
|
||||
},
|
||||
{
|
||||
.id = "ProtectProc=",
|
||||
.url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectProc=",
|
||||
.description_good = "Service has restricted access to process tree (/proc hidepid=)",
|
||||
.description_bad = "Service has full access to process tree (/proc hidepid=)",
|
||||
.weight = 1000,
|
||||
.range = 3,
|
||||
.assess = assess_protect_proc,
|
||||
},
|
||||
{
|
||||
.id = "ProcSubset=",
|
||||
.url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProcSubset=",
|
||||
.description_good = "Service has no access to non-process /proc files (/proc subset=)",
|
||||
.description_bad = "Service has full access to non-process /proc files (/proc subset=)",
|
||||
.weight = 10,
|
||||
.range = 1,
|
||||
.assess = assess_proc_subset,
|
||||
},
|
||||
{
|
||||
.id = "NotifyAccess=",
|
||||
.url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#NotifyAccess=",
|
||||
|
@ -1908,6 +1968,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
|
|||
{ "IPEgressFilterPath", "as", property_read_ip_filters, 0 },
|
||||
{ "Id", "s", NULL, offsetof(struct security_info, id) },
|
||||
{ "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
|
||||
{ "ProtectProc", "s", NULL, offsetof(struct security_info, protect_proc) },
|
||||
{ "ProcSubset", "s", NULL, offsetof(struct security_info, proc_subset) },
|
||||
{ "LoadState", "s", NULL, offsetof(struct security_info, load_state) },
|
||||
{ "LockPersonality", "b", NULL, offsetof(struct security_info, lock_personality) },
|
||||
{ "MemoryDenyWriteExecute", "b", NULL, offsetof(struct security_info, memory_deny_write_execute) },
|
||||
|
|
|
@ -47,6 +47,8 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInp
|
|||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_proc, protect_proc, ProtectProc);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_proc_subset, proc_subset, ProcSubset);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_home, protect_home, ProtectHome);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system, ProtectSystem);
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long);
|
||||
|
@ -1016,6 +1018,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
|||
SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
|
||||
|
@ -1354,6 +1358,8 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(utmp_mode, ExecUtmpMode, exec_utmp_mode_fr
|
|||
static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_system, ProtectSystem, protect_system_from_string);
|
||||
static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_home, ProtectHome, protect_home_from_string);
|
||||
static BUS_DEFINE_SET_TRANSIENT_PARSE(keyring_mode, ExecKeyringMode, exec_keyring_mode_from_string);
|
||||
static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_proc, ProtectProc, protect_proc_from_string);
|
||||
static BUS_DEFINE_SET_TRANSIENT_PARSE(proc_subset, ProcSubset, proc_subset_from_string);
|
||||
static BUS_DEFINE_SET_TRANSIENT_PARSE(preserve_mode, ExecPreserveMode, exec_preserve_mode_from_string);
|
||||
static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality);
|
||||
static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check);
|
||||
|
@ -1706,6 +1712,12 @@ int bus_exec_context_set_transient_property(
|
|||
if (streq(name, "KeyringMode"))
|
||||
return bus_set_transient_keyring_mode(u, name, &c->keyring_mode, message, flags, error);
|
||||
|
||||
if (streq(name, "ProtectProc"))
|
||||
return bus_set_transient_protect_proc(u, name, &c->protect_proc, message, flags, error);
|
||||
|
||||
if (streq(name, "ProcSubset"))
|
||||
return bus_set_transient_proc_subset(u, name, &c->proc_subset, message, flags, error);
|
||||
|
||||
if (streq(name, "RuntimeDirectoryPreserve"))
|
||||
return bus_set_transient_preserve_mode(u, name, &c->runtime_directory_preserve_mode, message, flags, error);
|
||||
|
||||
|
|
|
@ -1948,7 +1948,9 @@ static bool exec_needs_mount_namespace(
|
|||
context->protect_kernel_tunables ||
|
||||
context->protect_kernel_modules ||
|
||||
context->protect_kernel_logs ||
|
||||
context->protect_control_groups)
|
||||
context->protect_control_groups ||
|
||||
context->protect_proc != PROTECT_PROC_DEFAULT ||
|
||||
context->proc_subset != PROC_SUBSET_ALL)
|
||||
return true;
|
||||
|
||||
if (context->root_directory) {
|
||||
|
@ -2650,6 +2652,10 @@ static int apply_mount_namespace(
|
|||
.protect_hostname = context->protect_hostname,
|
||||
.mount_apivfs = context->mount_apivfs,
|
||||
.private_mounts = context->private_mounts,
|
||||
.protect_home = context->protect_home,
|
||||
.protect_system = context->protect_system,
|
||||
.protect_proc = context->protect_proc,
|
||||
.proc_subset = context->proc_subset,
|
||||
};
|
||||
} else if (!context->dynamic_user && root_dir)
|
||||
/*
|
||||
|
@ -2680,8 +2686,6 @@ static int apply_mount_namespace(
|
|||
tmp_dir,
|
||||
var_tmp_dir,
|
||||
context->log_namespace,
|
||||
needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
|
||||
needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
|
||||
context->mount_flags,
|
||||
context->root_hash, context->root_hash_size, context->root_hash_path,
|
||||
context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
|
||||
|
@ -4601,7 +4605,9 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
|
|||
"%sRestrictRealtime: %s\n"
|
||||
"%sRestrictSUIDSGID: %s\n"
|
||||
"%sKeyringMode: %s\n"
|
||||
"%sProtectHostname: %s\n",
|
||||
"%sProtectHostname: %s\n"
|
||||
"%sProtectProc: %s\n"
|
||||
"%sProcSubset: %s\n",
|
||||
prefix, c->umask,
|
||||
prefix, c->working_directory ? c->working_directory : "/",
|
||||
prefix, c->root_directory ? c->root_directory : "/",
|
||||
|
@ -4623,7 +4629,9 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
|
|||
prefix, yes_no(c->restrict_realtime),
|
||||
prefix, yes_no(c->restrict_suid_sgid),
|
||||
prefix, exec_keyring_mode_to_string(c->keyring_mode),
|
||||
prefix, yes_no(c->protect_hostname));
|
||||
prefix, yes_no(c->protect_hostname),
|
||||
prefix, protect_proc_to_string(c->protect_proc),
|
||||
prefix, proc_subset_to_string(c->proc_subset));
|
||||
|
||||
if (c->root_image)
|
||||
fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
|
||||
|
|
|
@ -260,6 +260,9 @@ struct ExecContext {
|
|||
|
||||
char *log_namespace;
|
||||
|
||||
ProtectProc protect_proc; /* hidepid= */
|
||||
ProcSubset proc_subset; /* subset= */
|
||||
|
||||
bool private_tmp;
|
||||
bool private_network;
|
||||
bool private_devices;
|
||||
|
|
|
@ -73,6 +73,8 @@ $1.AmbientCapabilities, config_parse_capability_set, 0,
|
|||
$1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec)
|
||||
$1.NoNewPrivileges, config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges)
|
||||
$1.KeyringMode, config_parse_exec_keyring_mode, 0, offsetof($1, exec_context.keyring_mode)
|
||||
$1.ProtectProc, config_parse_protect_proc, 0, offsetof($1, exec_context.protect_proc)
|
||||
$1.ProcSubset, config_parse_proc_subset, 0, offsetof($1, exec_context.proc_subset)
|
||||
m4_ifdef(`HAVE_SECCOMP',
|
||||
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
|
||||
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
|
||||
|
|
|
@ -118,6 +118,8 @@ DEFINE_CONFIG_PARSE(config_parse_exec_secure_bits, secure_bits_from_string, "Fai
|
|||
DEFINE_CONFIG_PARSE_ENUM(config_parse_collect_mode, collect_mode, CollectMode, "Failed to parse garbage collection mode");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode, "Failed to parse keyring mode");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_proc, protect_proc, ProtectProc, "Failed to parse /proc/ protection mode");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_proc_subset, proc_subset, ProcSubset, "Failed to parse /proc/ subset mode");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to parse job mode");
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
|
||||
|
|
|
@ -108,6 +108,8 @@ CONFIG_PARSER_PROTOTYPE(config_parse_user_group_strv_compat);
|
|||
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_namespaces);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_bind_paths);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_exec_keyring_mode);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_protect_proc);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_proc_subset);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields);
|
||||
|
|
|
@ -97,7 +97,7 @@ static const MountEntry protect_kernel_tunables_table[] = {
|
|||
{ "/proc/latency_stats", READONLY, true },
|
||||
{ "/proc/mtrr", READONLY, true },
|
||||
{ "/proc/scsi", READONLY, true },
|
||||
{ "/proc/sys", READONLY, false },
|
||||
{ "/proc/sys", READONLY, true },
|
||||
{ "/proc/sysrq-trigger", READONLY, true },
|
||||
{ "/proc/timer_stats", READONLY, true },
|
||||
{ "/sys", READONLY, false },
|
||||
|
@ -863,33 +863,66 @@ static int mount_sysfs(const MountEntry *m) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int mount_procfs(const MountEntry *m) {
|
||||
int r;
|
||||
static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
|
||||
const char *entry_path;
|
||||
|
||||
assert(m);
|
||||
assert(ns_info);
|
||||
|
||||
(void) mkdir_p_label(mount_entry_path(m), 0755);
|
||||
entry_path = mount_entry_path(m);
|
||||
|
||||
r = path_is_mount_point(mount_entry_path(m), NULL, 0);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
|
||||
if (r > 0) /* make this a NOP if /proc is already a mount point */
|
||||
return 0;
|
||||
/* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
|
||||
* one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
|
||||
* our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
|
||||
* mounted on /proc/ first. */
|
||||
|
||||
/* Mount a new instance, so that we get the one that matches our user namespace, if we are running in one */
|
||||
if (mount("proc", mount_entry_path(m), "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
|
||||
return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m));
|
||||
(void) mkdir_p_label(entry_path, 0755);
|
||||
(void) umount_recursive(entry_path, 0);
|
||||
|
||||
if (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
|
||||
ns_info->proc_subset != PROC_SUBSET_ALL) {
|
||||
_cleanup_free_ char *opts = NULL;
|
||||
|
||||
/* Starting with kernel 5.8 procfs' hidepid= logic is truly per-instance (previously it
|
||||
* pretended to be per-instance but actually was per-namespace), hence let's make use of it
|
||||
* if requested. To make sure this logic succeeds only on kernels where hidepid= is
|
||||
* per-instance, we'll exclusively use the textual value for hidepid=, since support was
|
||||
* added in the same commit: if it's supported it is thus also per-instance. */
|
||||
|
||||
opts = strjoin("hidepid=",
|
||||
ns_info->protect_proc == PROTECT_PROC_DEFAULT ? "off" :
|
||||
protect_proc_to_string(ns_info->protect_proc),
|
||||
ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
|
||||
if (!opts)
|
||||
return -ENOMEM;
|
||||
|
||||
if (mount("proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts) < 0) {
|
||||
if (errno != EINVAL)
|
||||
return log_debug_errno(errno, "Failed to mount %s (options=%s): %m", mount_entry_path(m), opts);
|
||||
|
||||
/* If this failed with EINVAL then this likely means the textual hidepid= stuff is
|
||||
* not supported by the kernel, and thus the per-instance hidepid= neither, which
|
||||
* means we really don't want to use it, since it would affect our host's /proc
|
||||
* mount. Hence let's gracefully fallback to a classic, unrestricted version. */
|
||||
} else
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mount("proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
|
||||
return log_debug_errno(errno, "Failed to mount %s (no options): %m", mount_entry_path(m));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mount_tmpfs(const MountEntry *m) {
|
||||
const char *entry_path, *inner_path;
|
||||
int r;
|
||||
const char *entry_path = mount_entry_path(m);
|
||||
const char *source_path = m->path_const;
|
||||
|
||||
assert(m);
|
||||
|
||||
entry_path = mount_entry_path(m);
|
||||
inner_path = m->path_const;
|
||||
|
||||
/* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */
|
||||
|
||||
(void) mkdir_p_label(entry_path, 0755);
|
||||
|
@ -898,9 +931,9 @@ static int mount_tmpfs(const MountEntry *m) {
|
|||
if (mount("tmpfs", entry_path, "tmpfs", m->flags, mount_entry_options(m)) < 0)
|
||||
return log_debug_errno(errno, "Failed to mount %s: %m", entry_path);
|
||||
|
||||
r = label_fix_container(entry_path, source_path, 0);
|
||||
r = label_fix_container(entry_path, inner_path, 0);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, source_path);
|
||||
return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -995,7 +1028,8 @@ static int follow_symlink(
|
|||
|
||||
static int apply_mount(
|
||||
const char *root_directory,
|
||||
MountEntry *m) {
|
||||
MountEntry *m,
|
||||
const NamespaceInfo *ns_info) {
|
||||
|
||||
_cleanup_free_ char *inaccessible = NULL;
|
||||
bool rbind = true, make = false;
|
||||
|
@ -1003,6 +1037,7 @@ static int apply_mount(
|
|||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(ns_info);
|
||||
|
||||
log_debug("Applying namespace mount on %s", mount_entry_path(m));
|
||||
|
||||
|
@ -1107,7 +1142,7 @@ static int apply_mount(
|
|||
return mount_sysfs(m);
|
||||
|
||||
case PROCFS:
|
||||
return mount_procfs(m);
|
||||
return mount_procfs(m, ns_info);
|
||||
|
||||
case MOUNT_IMAGES:
|
||||
return mount_images(m);
|
||||
|
@ -1219,7 +1254,9 @@ static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) {
|
|||
|
||||
return ns_info->mount_apivfs ||
|
||||
ns_info->protect_control_groups ||
|
||||
ns_info->protect_kernel_tunables;
|
||||
ns_info->protect_kernel_tunables ||
|
||||
ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
|
||||
ns_info->proc_subset != PROC_SUBSET_ALL;
|
||||
}
|
||||
|
||||
static size_t namespace_calculate_mounts(
|
||||
|
@ -1233,25 +1270,23 @@ static size_t namespace_calculate_mounts(
|
|||
size_t n_mount_images,
|
||||
const char* tmp_dir,
|
||||
const char* var_tmp_dir,
|
||||
const char* log_namespace,
|
||||
ProtectHome protect_home,
|
||||
ProtectSystem protect_system) {
|
||||
const char* log_namespace) {
|
||||
|
||||
size_t protect_home_cnt;
|
||||
size_t protect_system_cnt =
|
||||
(protect_system == PROTECT_SYSTEM_STRICT ?
|
||||
(ns_info->protect_system == PROTECT_SYSTEM_STRICT ?
|
||||
ELEMENTSOF(protect_system_strict_table) :
|
||||
((protect_system == PROTECT_SYSTEM_FULL) ?
|
||||
((ns_info->protect_system == PROTECT_SYSTEM_FULL) ?
|
||||
ELEMENTSOF(protect_system_full_table) :
|
||||
((protect_system == PROTECT_SYSTEM_YES) ?
|
||||
((ns_info->protect_system == PROTECT_SYSTEM_YES) ?
|
||||
ELEMENTSOF(protect_system_yes_table) : 0)));
|
||||
|
||||
protect_home_cnt =
|
||||
(protect_home == PROTECT_HOME_YES ?
|
||||
(ns_info->protect_home == PROTECT_HOME_YES ?
|
||||
ELEMENTSOF(protect_home_yes_table) :
|
||||
((protect_home == PROTECT_HOME_READ_ONLY) ?
|
||||
((ns_info->protect_home == PROTECT_HOME_READ_ONLY) ?
|
||||
ELEMENTSOF(protect_home_read_only_table) :
|
||||
((protect_home == PROTECT_HOME_TMPFS) ?
|
||||
((ns_info->protect_home == PROTECT_HOME_TMPFS) ?
|
||||
ELEMENTSOF(protect_home_tmpfs_table) : 0)));
|
||||
|
||||
return !!tmp_dir + !!var_tmp_dir +
|
||||
|
@ -1355,8 +1390,6 @@ int setup_namespace(
|
|||
const char* tmp_dir,
|
||||
const char* var_tmp_dir,
|
||||
const char *log_namespace,
|
||||
ProtectHome protect_home,
|
||||
ProtectSystem protect_system,
|
||||
unsigned long mount_flags,
|
||||
const void *root_hash,
|
||||
size_t root_hash_size,
|
||||
|
@ -1389,10 +1422,10 @@ int setup_namespace(
|
|||
|
||||
/* Make the whole image read-only if we can determine that we only access it in a read-only fashion. */
|
||||
if (root_read_only(read_only_paths,
|
||||
protect_system) &&
|
||||
ns_info->protect_system) &&
|
||||
home_read_only(read_only_paths, inaccessible_paths, empty_directories,
|
||||
bind_mounts, n_bind_mounts, temporary_filesystems, n_temporary_filesystems,
|
||||
protect_home) &&
|
||||
ns_info->protect_home) &&
|
||||
strv_isempty(read_write_paths))
|
||||
dissect_image_flags |= DISSECT_IMAGE_READ_ONLY;
|
||||
|
||||
|
@ -1461,8 +1494,7 @@ int setup_namespace(
|
|||
n_temporary_filesystems,
|
||||
n_mount_images,
|
||||
tmp_dir, var_tmp_dir,
|
||||
log_namespace,
|
||||
protect_home, protect_system);
|
||||
log_namespace);
|
||||
|
||||
if (n_mounts > 0) {
|
||||
m = mounts = new0(MountEntry, n_mounts);
|
||||
|
@ -1559,11 +1591,11 @@ int setup_namespace(
|
|||
};
|
||||
}
|
||||
|
||||
r = append_protect_home(&m, protect_home, ns_info->ignore_protect_paths);
|
||||
r = append_protect_home(&m, ns_info->protect_home, ns_info->ignore_protect_paths);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
r = append_protect_system(&m, protect_system, false);
|
||||
r = append_protect_system(&m, ns_info->protect_system, false);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
|
@ -1720,7 +1752,7 @@ int setup_namespace(
|
|||
break;
|
||||
}
|
||||
|
||||
r = apply_mount(root, m);
|
||||
r = apply_mount(root, m, ns_info);
|
||||
if (r < 0) {
|
||||
if (error_path && mount_entry_path(m))
|
||||
*error_path = strdup(mount_entry_path(m));
|
||||
|
@ -2240,3 +2272,19 @@ static const char* const namespace_type_table[] = {
|
|||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType);
|
||||
|
||||
static const char* const protect_proc_table[_PROTECT_PROC_MAX] = {
|
||||
[PROTECT_PROC_DEFAULT] = "default",
|
||||
[PROTECT_PROC_NOACCESS] = "noaccess",
|
||||
[PROTECT_PROC_INVISIBLE] = "invisible",
|
||||
[PROTECT_PROC_PTRACEABLE] = "ptraceable",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(protect_proc, ProtectProc);
|
||||
|
||||
static const char* const proc_subset_table[_PROC_SUBSET_MAX] = {
|
||||
[PROC_SUBSET_ALL] = "all",
|
||||
[PROC_SUBSET_PID] = "pid",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(proc_subset, ProcSubset);
|
||||
|
|
|
@ -47,6 +47,22 @@ typedef enum ProtectSystem {
|
|||
_PROTECT_SYSTEM_INVALID = -1
|
||||
} ProtectSystem;
|
||||
|
||||
typedef enum ProtectProc {
|
||||
PROTECT_PROC_DEFAULT,
|
||||
PROTECT_PROC_NOACCESS, /* hidepid=noaccess */
|
||||
PROTECT_PROC_INVISIBLE, /* hidepid=invisible */
|
||||
PROTECT_PROC_PTRACEABLE, /* hidepid=ptraceable */
|
||||
_PROTECT_PROC_MAX,
|
||||
_PROTECT_PROC_INVALID = -1,
|
||||
} ProtectProc;
|
||||
|
||||
typedef enum ProcSubset {
|
||||
PROC_SUBSET_ALL,
|
||||
PROC_SUBSET_PID, /* subset=pid */
|
||||
_PROC_SUBSET_MAX,
|
||||
_PROC_SUBSET_INVALID = -1,
|
||||
} ProcSubset;
|
||||
|
||||
struct NamespaceInfo {
|
||||
bool ignore_protect_paths:1;
|
||||
bool private_dev:1;
|
||||
|
@ -57,6 +73,10 @@ struct NamespaceInfo {
|
|||
bool protect_kernel_logs:1;
|
||||
bool mount_apivfs:1;
|
||||
bool protect_hostname:1;
|
||||
ProtectHome protect_home;
|
||||
ProtectSystem protect_system;
|
||||
ProtectProc protect_proc;
|
||||
ProcSubset proc_subset;
|
||||
};
|
||||
|
||||
struct BindMount {
|
||||
|
@ -98,8 +118,6 @@ int setup_namespace(
|
|||
const char *tmp_dir,
|
||||
const char *var_tmp_dir,
|
||||
const char *log_namespace,
|
||||
ProtectHome protect_home,
|
||||
ProtectSystem protect_system,
|
||||
unsigned long mount_flags,
|
||||
const void *root_hash,
|
||||
size_t root_hash_size,
|
||||
|
@ -135,6 +153,12 @@ ProtectHome protect_home_from_string(const char *s) _pure_;
|
|||
const char* protect_system_to_string(ProtectSystem p) _const_;
|
||||
ProtectSystem protect_system_from_string(const char *s) _pure_;
|
||||
|
||||
const char* protect_proc_to_string(ProtectProc i) _const_;
|
||||
ProtectProc protect_proc_from_string(const char *s) _pure_;
|
||||
|
||||
const char* proc_subset_to_string(ProcSubset i) _const_;
|
||||
ProcSubset proc_subset_from_string(const char *s) _pure_;
|
||||
|
||||
void bind_mount_free_many(BindMount *b, size_t n);
|
||||
int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);
|
||||
|
||||
|
|
|
@ -855,6 +855,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
|
|||
"RuntimeDirectoryPreserve",
|
||||
"Personality",
|
||||
"KeyringMode",
|
||||
"ProtectProc",
|
||||
"ProcSubset",
|
||||
"NetworkNamespacePath",
|
||||
"LogNamespace"))
|
||||
return bus_append_string(m, field, eq);
|
||||
|
|
|
@ -163,8 +163,6 @@ static void test_protect_kernel_logs(void) {
|
|||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
PROTECT_HOME_NO,
|
||||
PROTECT_SYSTEM_NO,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
|
|
|
@ -36,6 +36,8 @@ int main(int argc, char *argv[]) {
|
|||
.protect_control_groups = true,
|
||||
.protect_kernel_tunables = true,
|
||||
.protect_kernel_modules = true,
|
||||
.protect_proc = PROTECT_PROC_NOACCESS,
|
||||
.proc_subset = PROC_SUBSET_PID,
|
||||
};
|
||||
|
||||
char *root_directory;
|
||||
|
@ -76,8 +78,6 @@ int main(int argc, char *argv[]) {
|
|||
tmp_dir,
|
||||
var_tmp_dir,
|
||||
NULL,
|
||||
PROTECT_HOME_NO,
|
||||
PROTECT_SYSTEM_NO,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
|
|
|
@ -782,6 +782,8 @@ KEYMAP=
|
|||
KEYMAP_TOGGLE=
|
||||
KeepFree=
|
||||
KeyringMode=
|
||||
ProtectProc=
|
||||
ProcSubset=
|
||||
KillExcludeUsers=
|
||||
KillOnlyUsers=
|
||||
KillSignal=
|
||||
|
|
|
@ -23,11 +23,12 @@ NoNewPrivileges=yes
|
|||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectSystem=strict
|
||||
ReadWritePaths=/etc
|
||||
RestrictAddressFamilies=AF_UNIX
|
||||
|
|
|
@ -19,12 +19,13 @@ LockPersonality=yes
|
|||
MemoryDenyWriteExecute=yes
|
||||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
RestrictRealtime=yes
|
||||
|
|
|
@ -21,13 +21,14 @@ NoNewPrivileges=yes
|
|||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectClock=yes
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectSystem=strict
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
|
|
|
@ -19,12 +19,13 @@ ExecStart=@rootlibexecdir@/systemd-journal-upload --save-state
|
|||
LockPersonality=yes
|
||||
MemoryDenyWriteExecute=yes
|
||||
PrivateDevices=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
RestrictRealtime=yes
|
||||
|
|
|
@ -23,12 +23,13 @@ NoNewPrivileges=yes
|
|||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectSystem=strict
|
||||
ReadWritePaths=/etc
|
||||
RestrictAddressFamilies=AF_UNIX
|
||||
|
|
|
@ -28,7 +28,6 @@ DeviceAllow=char-drm rw
|
|||
DeviceAllow=char-input rw
|
||||
DeviceAllow=char-tty rw
|
||||
DeviceAllow=char-vcs rw
|
||||
# Make sure the DeviceAllow= lines above can work correctly when referenceing char-drm
|
||||
ExecStart=@rootlibexecdir@/systemd-logind
|
||||
FileDescriptorStoreMax=512
|
||||
IPAddressDeny=any
|
||||
|
@ -36,12 +35,13 @@ LockPersonality=yes
|
|||
MemoryDenyWriteExecute=yes
|
||||
NoNewPrivileges=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectClock=yes
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectSystem=strict
|
||||
ReadWritePaths=/etc /run
|
||||
Restart=always
|
||||
|
|
|
@ -26,13 +26,15 @@ ExecStart=!!@rootlibexecdir@/systemd-networkd
|
|||
LockPersonality=yes
|
||||
MemoryDenyWriteExecute=yes
|
||||
NoNewPrivileges=yes
|
||||
ProtectProc=invisible
|
||||
ProtectClock=yes
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectSystem=strict
|
||||
Restart=on-failure
|
||||
RestartKillSignal=SIGUSR2
|
||||
RestartSec=0
|
||||
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 AF_PACKET AF_ALG
|
||||
RestrictNamespaces=yes
|
||||
|
@ -44,7 +46,6 @@ SystemCallArchitectures=native
|
|||
SystemCallErrorNumber=EPERM
|
||||
SystemCallFilter=@system-service
|
||||
Type=notify
|
||||
RestartKillSignal=SIGUSR2
|
||||
User=systemd-network
|
||||
@SERVICE_WATCHDOG@
|
||||
|
||||
|
|
|
@ -28,12 +28,13 @@ MemoryDenyWriteExecute=yes
|
|||
NoNewPrivileges=yes
|
||||
PrivateDevices=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectClock=yes
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectSystem=strict
|
||||
Restart=always
|
||||
RestartSec=0
|
||||
|
|
|
@ -22,12 +22,13 @@ LockPersonality=yes
|
|||
MemoryDenyWriteExecute=yes
|
||||
NoNewPrivileges=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectSystem=strict
|
||||
ReadWritePaths=/etc
|
||||
RestrictAddressFamilies=AF_UNIX
|
||||
|
|
|
@ -27,12 +27,13 @@ MemoryDenyWriteExecute=yes
|
|||
NoNewPrivileges=yes
|
||||
PrivateDevices=yes
|
||||
PrivateTmp=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelLogs=yes
|
||||
ProtectSystem=strict
|
||||
Restart=always
|
||||
RestartSec=0
|
||||
|
|
|
@ -24,6 +24,7 @@ LockPersonality=yes
|
|||
MemoryDenyWriteExecute=yes
|
||||
NoNewPrivileges=yes
|
||||
PrivateDevices=yes
|
||||
ProtectProc=invisible
|
||||
ProtectControlGroups=yes
|
||||
ProtectHome=yes
|
||||
ProtectHostname=yes
|
||||
|
|
Loading…
Reference in New Issue