core: add new RestrictAddressFamilies= switch

This new unit settings allows restricting which address families are
available to processes. This is an effective way to minimize the attack
surface of services, by turning off entire network stacks for them.

This is based on seccomp, and does not work on x86-32, since seccomp
cannot filter socketcall() syscalls on that platform.
This commit is contained in:
Lennart Poettering 2014-02-25 20:37:03 +01:00
parent 9875fd7875
commit 4298d0b512
13 changed files with 441 additions and 9 deletions

View file

@ -764,6 +764,8 @@ libsystemd_shared_la_SOURCES = \
src/shared/net-util.h \
src/shared/errno-list.c \
src/shared/errno-list.h \
src/shared/af-list.c \
src/shared/af-list.h \
src/shared/audit.c \
src/shared/audit.h \
src/shared/xml.c \
@ -775,7 +777,9 @@ libsystemd_shared_la_SOURCES = \
nodist_libsystemd_shared_la_SOURCES = \
src/shared/errno-from-name.h \
src/shared/errno-to-name.h
src/shared/errno-to-name.h \
src/shared/af-from-name.h \
src/shared/af-to-name.h
libsystemd_shared_la_CFLAGS = \
$(AM_CFLAGS) \
@ -1059,11 +1063,15 @@ CLEANFILES += \
src/core/load-fragment-gperf.c \
src/core/load-fragment-gperf-nulstr.c \
src/shared/errno-list.txt \
src/shared/errno-from-name.gperf
src/shared/errno-from-name.gperf \
src/shared/af-list.txt \
src/shared/af-from-name.gperf
BUILT_SOURCES += \
src/shared/errno-from-name.h \
src/shared/errno-to-name.h
src/shared/errno-to-name.h \
src/shared/af-from-name.h \
src/shared/af-to-name.h
src/shared/errno-list.txt:
$(AM_V_at)$(MKDIR_P) $(dir $@)
@ -1081,6 +1089,22 @@ src/shared/errno-to-name.h: src/shared/errno-list.txt
$(AM_V_at)$(MKDIR_P) $(dir $@)
$(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const errno_names[] = { "} { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
src/shared/af-list.txt:
$(AM_V_at)$(MKDIR_P) $(dir $@)
$(AM_V_GEN)$(CPP) $(CFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) -dM -include sys/socket.h - < /dev/null | grep -v AF_UNSPEC | grep -v AF_MAX | $(AWK) '/^#define[ \t]+AF_[^ \t]+[ \t]+PF_[^ \t]/ { print $$2; }' > $@
src/shared/af-from-name.gperf: src/shared/af-list.txt
$(AM_V_at)$(MKDIR_P) $(dir $@)
$(AM_V_GEN)$(AWK) 'BEGIN{ print "struct af_name { const char* name; int id; };"; print "%null-strings"; print "%%";} { printf "%s, %s\n", $$1, $$1 }' < $< > $@
src/shared/af-from-name.h: src/shared/af-from-name.gperf
$(AM_V_at)$(MKDIR_P) $(dir $@)
$(AM_V_GPERF)$(GPERF) -L ANSI-C -t --ignore-case -N lookup_af -H hash_af_name -p -C < $< > $@
src/shared/af-to-name.h: src/shared/af-list.txt
$(AM_V_at)$(MKDIR_P) $(dir $@)
$(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const af_names[] = { "} !/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
# ------------------------------------------------------------------------------
systemd_SOURCES = \
src/core/main.c

View file

@ -1121,6 +1121,55 @@
applied.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RestrictAddressFamilies=</varname></term>
<listitem><para>Restricts the set of
socket address families accessible to
the processes of this unit. Takes a
space-separated list of address family
names to whitelist, such as
<constant>AF_UNIX</constant>,
<constant>AF_INET</constant> or
<constant>AF_INET6</constant>. When
prefixed with <constant>~</constant>
the listed address families will be
applied as blacklist, otherwise as
whitelist. Note that this restricts
access to the
<citerefentry><refentrytitle>socket</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system call only. Sockets passed into
the process by other means (for
example, by using socket activation
with socket units, see
<citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>)
are unaffected. Also, sockets created
with <function>socketpair()</function>
(which creates connected AF_UNIX
sockets only) are unaffected. Note
that this option has no effect on
32bit x86 and is ignored (but works
correctly on x86-64). By default no
restriction applies, all address
families are accessible to
processes. If assigned the empty
string any previous list changes are
undone.</para>
<para>Use this option to limit
exposure of processes to remote
systems, in particular via exotic
network protocols. Note that in most
cases the local
<constant>AF_UNIX</constant> address
family should be included in the
configured whitelist as it is
frequently used for local
communication, including for
<citerefentry><refentrytitle>syslog</refentrytitle><manvolnum>2</manvolnum></citerefentry>
logging.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>Personality=</varname></term>
@ -1138,6 +1187,7 @@
host system's
kernel.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View file

@ -34,6 +34,7 @@
#include "dbus-execute.h"
#include "capability.h"
#include "env-util.h"
#include "af-list.h"
#ifdef HAVE_SECCOMP
#include "seccomp-util.h"
@ -518,6 +519,54 @@ static int property_get_personality(
return sd_bus_message_append(reply, "s", personality_to_string(c->personality));
}
static int property_get_address_families(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
ExecContext *c = userdata;
_cleanup_strv_free_ char **l = NULL;
Iterator i;
void *af;
int r;
assert(bus);
assert(reply);
assert(c);
r = sd_bus_message_open_container(reply, 'r', "bas");
if (r < 0)
return r;
r = sd_bus_message_append(reply, "b", c->address_families_whitelist);
if (r < 0)
return r;
SET_FOREACH(af, c->address_families, i) {
const char *name;
name = af_to_name(PTR_TO_INT(af));
if (!name)
continue;
r = strv_extend(&l, name);
if (r < 0)
return -ENOMEM;
}
strv_sort(l);
r = sd_bus_message_append_strv(reply, l);
if (r < 0)
return r;
return sd_bus_message_close_container(reply);
}
const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@ -585,6 +634,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};

View file

@ -81,6 +81,7 @@
#include "async.h"
#include "selinux-util.h"
#include "errno-list.h"
#include "af-list.h"
#include "apparmor-util.h"
#ifdef HAVE_SECCOMP
@ -994,9 +995,130 @@ static int apply_seccomp(ExecContext *c) {
finish:
seccomp_release(seccomp);
return r;
}
static int apply_address_families(ExecContext *c) {
scmp_filter_ctx *seccomp;
Iterator i;
int r;
assert(c);
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return -ENOMEM;
r = seccomp_add_secondary_archs(seccomp);
if (r < 0)
goto finish;
if (c->address_families_whitelist) {
int af, first = 0, last = 0;
void *afp;
/* If this is a whitelist, we first block the address
* families that are out of range and then everything
* that is not in the set. First, we find the lowest
* and highest address family in the set. */
SET_FOREACH(afp, c->address_families, i) {
af = PTR_TO_INT(afp);
if (af <= 0 || af >= af_max())
continue;
if (first == 0 || af < first)
first = af;
if (last == 0 || af > last)
last = af;
}
assert((first == 0) == (last == 0));
if (first == 0) {
/* No entries in the valid range, block everything */
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPROTONOSUPPORT),
SCMP_SYS(socket),
0);
if (r < 0)
goto finish;
} else {
/* Block everything below the first entry */
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPROTONOSUPPORT),
SCMP_SYS(socket),
1,
SCMP_A0(SCMP_CMP_LT, first));
if (r < 0)
goto finish;
/* Block everything above the last entry */
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPROTONOSUPPORT),
SCMP_SYS(socket),
1,
SCMP_A0(SCMP_CMP_GT, last));
if (r < 0)
goto finish;
/* Block everything between the first and last
* entry */
for (af = 1; af < af_max(); af++) {
if (set_contains(c->address_families, INT_TO_PTR(af)))
continue;
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPROTONOSUPPORT),
SCMP_SYS(socket),
1,
SCMP_A0(SCMP_CMP_EQ, af));
if (r < 0)
goto finish;
}
}
} else {
void *af;
/* If this is a blacklist, then generate one rule for
* each address family that are then combined in OR
* checks. */
SET_FOREACH(af, c->address_families, i) {
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPROTONOSUPPORT),
SCMP_SYS(socket),
1,
SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
if (r < 0)
goto finish;
}
}
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
if (r < 0)
goto finish;
r = seccomp_load(seccomp);
finish:
seccomp_release(seccomp);
return r;
}
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@ -1584,6 +1706,14 @@ int exec_spawn(ExecCommand *command,
}
#ifdef HAVE_SECCOMP
if (context->address_families) {
err = apply_address_families(context);
if (err < 0) {
r = EXIT_ADDRESS_FAMILIES;
goto fail_child;
}
}
if (context->syscall_filter || context->syscall_archs) {
err = apply_seccomp(context);
if (err < 0) {
@ -1777,13 +1907,14 @@ void exec_context_done(ExecContext *c) {
free(c->apparmor_profile);
c->apparmor_profile = NULL;
#ifdef HAVE_SECCOMP
set_free(c->syscall_filter);
c->syscall_filter = NULL;
set_free(c->syscall_archs);
c->syscall_archs = NULL;
#endif
set_free(c->address_families);
c->address_families = NULL;
}
void exec_command_done(ExecCommand *c) {

View file

@ -178,6 +178,9 @@ struct ExecContext {
int syscall_errno;
bool syscall_whitelist:1;
Set *address_families;
bool address_families_whitelist:1;
bool oom_score_adjust_set:1;
bool nice_set:1;
bool ioprio_set:1;

View file

@ -52,10 +52,12 @@ $1.NoNewPrivileges, config_parse_bool, 0,
m4_ifdef(`HAVE_SECCOMP',
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)',
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
`$1.SystemCallFilter, config_parse_warn_compat, 0, 0
$1.SystemCallArchitectures, config_parse_warn_compat, 0, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, 0, 0')
$1.SystemCallErrorNumber, config_parse_warn_compat, 0, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, 0, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)

View file

@ -56,6 +56,7 @@
#include "bus-util.h"
#include "bus-error.h"
#include "errno-list.h"
#include "af-list.h"
#ifdef HAVE_SECCOMP
#include "seccomp-util.h"
@ -2216,6 +2217,81 @@ int config_parse_syscall_errno(
c->syscall_errno = e;
return 0;
}
int config_parse_address_families(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
char *w, *state;
size_t l;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(u);
if (isempty(rvalue)) {
/* Empty assignment resets the list */
set_free(c->address_families);
c->address_families = NULL;
c->address_families_whitelist = false;
return 0;
}
if (rvalue[0] == '~') {
invert = true;
rvalue++;
}
if (!c->address_families) {
c->address_families = set_new(trivial_hash_func, trivial_compare_func);
if (!c->address_families)
return log_oom();
c->address_families_whitelist = !invert;
}
FOREACH_WORD_QUOTED(w, l, rvalue, state) {
_cleanup_free_ char *t = NULL;
int af;
t = strndup(w, l);
if (!t)
return log_oom();
af = af_from_name(t);
if (af <= 0) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse address family, ignoring: %s", t);
continue;
}
/* If we previously wanted to forbid an address family and now
* we want to allow it, then remove it from the list
*/
if (!invert == c->address_families_whitelist) {
r = set_put(c->address_families, INT_TO_PTR(af));
if (r == -EEXIST)
continue;
if (r < 0)
return log_oom();
} else
set_remove(c->address_families, INT_TO_PTR(af));
}
return 0;
}
#endif
int config_parse_unit_slice(
@ -3024,6 +3100,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_syscall_filter, "SYSCALLS" },
{ config_parse_syscall_archs, "ARCHS" },
{ config_parse_syscall_errno, "ERRNO" },
{ config_parse_address_families, "FAMILIES" },
#endif
{ config_parse_cpu_shares, "SHARES" },
{ config_parse_memory_limit, "LIMIT" },
@ -3039,6 +3116,7 @@ void unit_dump_config_items(FILE *f) {
#endif
{ config_parse_job_mode, "MODE" },
{ config_parse_job_mode_isolate, "BOOLEAN" },
{ config_parse_personality, "PERSONALITY" },
};
const char *prev = NULL;

View file

@ -90,6 +90,7 @@ int config_parse_job_mode_isolate(const char *unit, const char *filename, unsign
int config_parse_exec_selinux_context(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_personality(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_apparmor_profile(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_address_families(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);

View file

@ -2,3 +2,7 @@
/errno-from-name.h
/errno-list.txt
/errno-to-name.h
/af-from-name.gperf
/af-from-name.h
/af-list.txt
/af-to-name.h

58
src/shared/af-list.c Normal file
View file

@ -0,0 +1,58 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2013 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <sys/socket.h>
#include <string.h>
#include "util.h"
#include "af-list.h"
static const struct af_name* lookup_af(register const char *str, register unsigned int len);
#include "af-to-name.h"
#include "af-from-name.h"
const char *af_to_name(int id) {
if (id <= 0)
return NULL;
if (id >= (int) ELEMENTSOF(af_names))
return NULL;
return af_names[id];
}
int af_from_name(const char *name) {
const struct af_name *sc;
assert(name);
sc = lookup_af(name, strlen(name));
if (!sc)
return AF_UNSPEC;
return sc->id;
}
int af_max(void) {
return ELEMENTSOF(af_names);
}

27
src/shared/af-list.h Normal file
View file

@ -0,0 +1,27 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#pragma once
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
const char *af_to_name(int id);
int af_from_name(const char *name);
int af_max(void);

View file

@ -139,6 +139,9 @@ const char* exit_status_to_string(ExitStatus status, ExitStatusLevel level) {
case EXIT_APPARMOR_PROFILE:
return "APPARMOR";
case EXIT_ADDRESS_FAMILIES:
return "ADDRESS_FAMILIES";
}
}

View file

@ -70,7 +70,8 @@ typedef enum ExitStatus {
EXIT_SECCOMP,
EXIT_SELINUX_CONTEXT,
EXIT_PERSONALITY, /* 230 */
EXIT_APPARMOR_PROFILE
EXIT_APPARMOR_PROFILE,
EXIT_ADDRESS_FAMILIES,
} ExitStatus;
typedef enum ExitStatusLevel {