execute: filter low-level I/O syscalls if PrivateDevices= is set

If device access is restricted via PrivateDevices=, let's also block the
various low-level I/O syscalls at the same time, so that we know that the
minimal set of devices in our virtualized /dev are really everything the unit
can access.
This commit is contained in:
Lennart Poettering 2016-08-26 16:39:04 +02:00 committed by Djalal Harouni
parent 1ecdba149b
commit ba128bb809
1 changed files with 64 additions and 1 deletions

View File

@ -1422,12 +1422,67 @@ finish:
return r;
}
static int apply_private_devices(Unit *u, const ExecContext *c) {
static const int device_syscalls[] = {
SCMP_SYS(ioperm),
SCMP_SYS(iopl),
SCMP_SYS(pciconfig_iobase),
SCMP_SYS(pciconfig_read),
SCMP_SYS(pciconfig_write),
#ifdef __NR_s390_pci_mmio_read
SCMP_SYS(s390_pci_mmio_read),
#endif
#ifdef __NR_s390_pci_mmio_write
SCMP_SYS(s390_pci_mmio_write),
#endif
};
scmp_filter_ctx *seccomp;
unsigned i;
int r;
assert(c);
/* If PrivateDevices= is set, also turn off iopl and friends. */
if (skip_seccomp_unavailable(u, "PrivateDevices="))
return 0;
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return -ENOMEM;
r = seccomp_add_secondary_archs(seccomp);
if (r < 0)
goto finish;
for (i = 0; i < ELEMENTSOF(device_syscalls); i++) {
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPERM),
device_syscalls[i],
0);
if (r < 0)
goto finish;
}
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
if (r < 0)
goto finish;
r = seccomp_load(seccomp);
finish:
seccomp_release(seccomp);
return r;
}
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
assert(idle_pipe);
idle_pipe[1] = safe_close(idle_pipe[1]);
idle_pipe[2] = safe_close(idle_pipe[2]);
@ -2584,6 +2639,14 @@ static int exec_child(
}
}
if (context->private_devices) {
r = apply_private_devices(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
if (context_has_syscall_filters(context)) {
r = apply_seccomp(unit, context);
if (r < 0) {