From aecd5ac6218f6291186b530b89cf2e97333fffc0 Mon Sep 17 00:00:00 2001 From: Topi Miettinen Date: Fri, 8 Feb 2019 19:25:00 +0200 Subject: [PATCH] core: ProtectHostname= feature Let services use a private UTS namespace. In addition, a seccomp filter is installed on set{host,domain}name and a ro bind mounts on /proc/sys/kernel/{host,domain}name. --- man/systemd.exec.xml | 11 ++++++++ src/core/dbus-execute.c | 4 +++ src/core/execute.c | 27 ++++++++++++++++--- src/core/execute.h | 1 + src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/namespace.c | 12 +++++++++ src/core/namespace.h | 1 + src/shared/bus-unit-util.c | 2 +- src/shared/seccomp-util.c | 37 +++++++++++++++++++++++++++ src/shared/seccomp-util.h | 1 + 10 files changed, 93 insertions(+), 4 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index eb476327aa..ce374f326d 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1129,6 +1129,17 @@ BindReadOnlyPaths=/var/lib/systemd security. + + ProtectHostname= + + Takes a boolean argument. When set, sets up a new UTS namespace for the executed + processes. In addition, changing hostname or domainname is prevented. Defaults to off. + + Note that the implementation of this setting might be impossible (for example if UTS namespaces are not + available), and the unit should be written in a way that does not solely rely on this setting for + security. + + ProtectKernelTunables= diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 11301e4b69..f22bf4a371 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -777,6 +777,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST), /* Obsolete/redundant properties: */ SD_BUS_PROPERTY("Capabilities", "s", property_get_empty_string, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), @@ -1153,6 +1154,9 @@ int bus_exec_context_set_transient_property( if (streq(name, "LockPersonality")) return bus_set_transient_bool(u, name, &c->lock_personality, message, flags, error); + if (streq(name, "ProtectHostname")) + return bus_set_transient_bool(u, name, &c->protect_hostname, message, flags, error); + if (streq(name, "UtmpIdentifier")) return bus_set_transient_string(u, name, &c->utmp_id, message, flags, error); diff --git a/src/core/execute.c b/src/core/execute.c index a7082310ba..fe5cf96008 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1410,7 +1410,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) { c->private_devices || context_has_syscall_filters(c) || !set_isempty(c->syscall_archs) || - c->lock_personality; + c->lock_personality || + c->protect_hostname; } #if HAVE_SECCOMP @@ -2420,6 +2421,7 @@ static int apply_mount_namespace( .protect_control_groups = context->protect_control_groups, .protect_kernel_tunables = context->protect_kernel_tunables, .protect_kernel_modules = context->protect_kernel_modules, + .protect_hostname = context->protect_hostname, .mount_apivfs = context->mount_apivfs, .private_mounts = context->private_mounts, }; @@ -3286,6 +3288,23 @@ static int exec_child( } } + if (context->protect_hostname) { + if (ns_type_supported(NAMESPACE_UTS)) { + if (unshare(CLONE_NEWUTS) < 0) { + *exit_status = EXIT_NAMESPACE; + return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m"); + } + } else + log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup."); +#if HAVE_SECCOMP + r = seccomp_protect_hostname(); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m"); + } +#endif + } + /* Drop groups as early as possbile */ if (needs_setuid) { r = enforce_groups(gid, supplementary_gids, ngids); @@ -4163,7 +4182,8 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { "%sIgnoreSIGPIPE: %s\n" "%sMemoryDenyWriteExecute: %s\n" "%sRestrictRealtime: %s\n" - "%sKeyringMode: %s\n", + "%sKeyringMode: %s\n" + "%sProtectHostname: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", prefix, c->root_directory ? c->root_directory : "/", @@ -4181,7 +4201,8 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->ignore_sigpipe), prefix, yes_no(c->memory_deny_write_execute), prefix, yes_no(c->restrict_realtime), - prefix, exec_keyring_mode_to_string(c->keyring_mode)); + prefix, exec_keyring_mode_to_string(c->keyring_mode), + prefix, yes_no(c->protect_hostname)); if (c->root_image) fprintf(f, "%sRootImage: %s\n", prefix, c->root_image); diff --git a/src/core/execute.h b/src/core/execute.h index 0f1bf56744..12a6e92b65 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -272,6 +272,7 @@ struct ExecContext { bool memory_deny_write_execute; bool restrict_realtime; + bool protect_hostname; bool oom_score_adjust_set:1; bool nice_set:1; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index cdbc67f885..38ac6a80fc 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -133,6 +133,7 @@ $1.LogsDirectoryMode, config_parse_mode, 0, $1.LogsDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_LOGS].paths) $1.ConfigurationDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].mode) $1.ConfigurationDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].paths) +$1.ProtectHostname, config_parse_bool, 0, offsetof($1, exec_context.protect_hostname) m4_ifdef(`HAVE_PAM', `$1.PAMName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.pam_name)', `$1.PAMName, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') diff --git a/src/core/namespace.c b/src/core/namespace.c index 7f553a42c2..d482c40c24 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1117,6 +1117,7 @@ static size_t namespace_calculate_mounts( (ns_info->protect_control_groups ? 1 : 0) + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + protect_home_cnt + protect_system_cnt + + (ns_info->protect_hostname ? 2 : 0) + (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0); } @@ -1301,6 +1302,17 @@ int setup_namespace( goto finish; } + if (ns_info->protect_hostname) { + *(m++) = (MountEntry) { + .path_const = "/proc/sys/kernel/hostname", + .mode = READONLY, + }; + *(m++) = (MountEntry) { + .path_const = "/proc/sys/kernel/domainname", + .mode = READONLY, + }; + } + assert(mounts + n_mounts == m); /* Prepend the root directory where that's necessary */ diff --git a/src/core/namespace.h b/src/core/namespace.h index 5e0ec97969..ab3983f790 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -52,6 +52,7 @@ struct NamespaceInfo { bool protect_kernel_tunables:1; bool protect_kernel_modules:1; bool mount_apivfs:1; + bool protect_hostname:1; }; struct BindMount { diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 9a8051d063..d4643403c7 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -740,7 +740,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", - "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality")) + "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality", "ProtectHostname")) return bus_append_parse_boolean(m, field, eq); diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index cc58b3c078..98a0066bae 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1762,3 +1762,40 @@ int seccomp_lock_personality(unsigned long personality) { return 0; } + +int seccomp_protect_hostname(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sethostname), + 0); + if (r < 0) + continue; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setdomainname), + 0); + if (r < 0) + continue; + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to apply hostname restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index d8a36c4e21..477400237b 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -85,6 +85,7 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist); int seccomp_restrict_realtime(void); int seccomp_memory_deny_write_execute(void); int seccomp_lock_personality(unsigned long personality); +int seccomp_protect_hostname(void); extern const uint32_t seccomp_local_archs[];