8d7b0c8fd7
This adds a new seccomp_init_conservative() helper call that is mostly just a wrapper around seccomp_init(), but turns off NNP and adds in all secondary archs, for best compatibility with everything else. Pretty much all of our code used the very same constructs for these three steps, hence unifying this in one small function makes things a lot shorter. This also changes incorrect usage of the "scmp_filter_ctx" type at various places. libseccomp defines it as typedef to "void*", i.e. it is a pointer type (pretty poor choice already!) that casts implicitly to and from all other pointer types (even poorer choice: you defined a confusing type now, and don't even gain any bit of type safety through it...). A lot of the code assumed the type would refer to a structure, and hence aded additional "*" here and there. Remove that.
186 lines
8.1 KiB
C
186 lines
8.1 KiB
C
/***
|
|
This file is part of systemd.
|
|
|
|
Copyright 2016 Lennart Poettering
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
|
(at your option) any later version.
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
***/
|
|
|
|
#include <errno.h>
|
|
#include <linux/netlink.h>
|
|
#include <sys/capability.h>
|
|
#include <sys/types.h>
|
|
|
|
#ifdef HAVE_SECCOMP
|
|
#include <seccomp.h>
|
|
#endif
|
|
|
|
#include "log.h"
|
|
|
|
#ifdef HAVE_SECCOMP
|
|
#include "seccomp-util.h"
|
|
#endif
|
|
|
|
#include "nspawn-seccomp.h"
|
|
|
|
#ifdef HAVE_SECCOMP
|
|
|
|
static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx,
|
|
uint64_t cap_list_retain) {
|
|
unsigned i;
|
|
int r;
|
|
static const struct {
|
|
uint64_t capability;
|
|
int syscall_num;
|
|
} blacklist[] = {
|
|
{ 0, SCMP_SYS(_sysctl) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(add_key) }, /* keyring is not namespaced */
|
|
{ 0, SCMP_SYS(afs_syscall) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(bdflush) },
|
|
#ifdef __NR_bpf
|
|
{ 0, SCMP_SYS(bpf) },
|
|
#endif
|
|
{ 0, SCMP_SYS(break) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(create_module) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(ftime) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(get_kernel_syms) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(getpmsg) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(gtty) }, /* obsolete syscall */
|
|
#ifdef __NR_kexec_file_load
|
|
{ 0, SCMP_SYS(kexec_file_load) },
|
|
#endif
|
|
{ 0, SCMP_SYS(kexec_load) },
|
|
{ 0, SCMP_SYS(keyctl) }, /* keyring is not namespaced */
|
|
{ 0, SCMP_SYS(lock) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(lookup_dcookie) },
|
|
{ 0, SCMP_SYS(mpx) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(nfsservctl) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(open_by_handle_at) },
|
|
{ 0, SCMP_SYS(perf_event_open) },
|
|
{ 0, SCMP_SYS(prof) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(profil) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(putpmsg) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(query_module) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(quotactl) },
|
|
{ 0, SCMP_SYS(request_key) }, /* keyring is not namespaced */
|
|
{ 0, SCMP_SYS(security) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(sgetmask) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(ssetmask) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(stty) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(swapoff) },
|
|
{ 0, SCMP_SYS(swapon) },
|
|
{ 0, SCMP_SYS(sysfs) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(tuxcall) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(ulimit) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(uselib) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(ustat) }, /* obsolete syscall */
|
|
{ 0, SCMP_SYS(vserver) }, /* obsolete syscall */
|
|
{ CAP_SYSLOG, SCMP_SYS(syslog) },
|
|
{ CAP_SYS_MODULE, SCMP_SYS(delete_module) },
|
|
{ CAP_SYS_MODULE, SCMP_SYS(finit_module) },
|
|
{ CAP_SYS_MODULE, SCMP_SYS(init_module) },
|
|
{ CAP_SYS_PACCT, SCMP_SYS(acct) },
|
|
{ CAP_SYS_PTRACE, SCMP_SYS(process_vm_readv) },
|
|
{ CAP_SYS_PTRACE, SCMP_SYS(process_vm_writev) },
|
|
{ CAP_SYS_PTRACE, SCMP_SYS(ptrace) },
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(ioperm) },
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(iopl) },
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(pciconfig_iobase) },
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(pciconfig_read) },
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(pciconfig_write) },
|
|
#ifdef __NR_s390_pci_mmio_read
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(s390_pci_mmio_read) },
|
|
#endif
|
|
#ifdef __NR_s390_pci_mmio_write
|
|
{ CAP_SYS_RAWIO, SCMP_SYS(s390_pci_mmio_write) },
|
|
#endif
|
|
{ CAP_SYS_TIME, SCMP_SYS(adjtimex) },
|
|
{ CAP_SYS_TIME, SCMP_SYS(clock_adjtime) },
|
|
{ CAP_SYS_TIME, SCMP_SYS(clock_settime) },
|
|
{ CAP_SYS_TIME, SCMP_SYS(settimeofday) },
|
|
{ CAP_SYS_TIME, SCMP_SYS(stime) },
|
|
};
|
|
|
|
for (i = 0; i < ELEMENTSOF(blacklist); i++) {
|
|
if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability)))
|
|
continue;
|
|
|
|
r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0);
|
|
if (r == -EFAULT)
|
|
continue; /* unknown syscall */
|
|
if (r < 0)
|
|
return log_error_errno(r, "Failed to block syscall: %m");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int setup_seccomp(uint64_t cap_list_retain) {
|
|
scmp_filter_ctx seccomp;
|
|
int r;
|
|
|
|
if (!is_seccomp_available()) {
|
|
log_debug("SECCOMP features not detected in the kernel, disabling SECCOMP audit filter");
|
|
return 0;
|
|
}
|
|
|
|
r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Failed to allocate seccomp object: %m");
|
|
|
|
r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain);
|
|
if (r < 0)
|
|
goto finish;
|
|
|
|
/*
|
|
Audit is broken in containers, much of the userspace audit
|
|
hookup will fail if running inside a container. We don't
|
|
care and just turn off creation of audit sockets.
|
|
|
|
This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
|
|
with EAFNOSUPPORT which audit userspace uses as indication
|
|
that audit is disabled in the kernel.
|
|
*/
|
|
|
|
r = seccomp_rule_add(
|
|
seccomp,
|
|
SCMP_ACT_ERRNO(EAFNOSUPPORT),
|
|
SCMP_SYS(socket),
|
|
2,
|
|
SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
|
|
SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
|
|
if (r < 0) {
|
|
log_error_errno(r, "Failed to add audit seccomp rule: %m");
|
|
goto finish;
|
|
}
|
|
|
|
r = seccomp_load(seccomp);
|
|
if (r < 0) {
|
|
log_error_errno(r, "Failed to install seccomp audit filter: %m");
|
|
goto finish;
|
|
}
|
|
|
|
finish:
|
|
seccomp_release(seccomp);
|
|
return r;
|
|
}
|
|
|
|
#else
|
|
|
|
int setup_seccomp(uint64_t cap_list_retain) {
|
|
return 0;
|
|
}
|
|
|
|
#endif
|