diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index f19e7f6ee9..f70e5c36d4 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -933,8 +933,10 @@ /dev/random (as well as the pseudo TTY subsystem) to it, but no physical devices such as /dev/sda, system memory /dev/mem, system ports /dev/port and others. This is useful to securely turn off physical device access by the - executed process. Defaults to false. Enabling this option will also remove CAP_MKNOD from - the capability bounding set for the unit (see above), and set DevicePolicy=closed (see + executed process. Defaults to false. Enabling this option will install a system call filter to block low-level + I/O system calls that are grouped in the @raw-io set, will also remove + CAP_MKNOD from the capability bounding set for the unit (see above), and set + DevicePolicy=closed (see systemd.resource-control5 for details). Note that using this setting will disconnect propagation of mounts from the service to the host (propagation in the opposite direction continues to work). This means that this setting may not be used for diff --git a/src/core/execute.c b/src/core/execute.c index 0488ba2ca9..3da7ef3be6 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1429,28 +1429,15 @@ finish: } static int apply_private_devices(Unit *u, const ExecContext *c) { - - static const int device_syscalls[] = { - SCMP_SYS(ioperm), - SCMP_SYS(iopl), - SCMP_SYS(pciconfig_iobase), - SCMP_SYS(pciconfig_read), - SCMP_SYS(pciconfig_write), -#ifdef __NR_s390_pci_mmio_read - SCMP_SYS(s390_pci_mmio_read), -#endif -#ifdef __NR_s390_pci_mmio_write - SCMP_SYS(s390_pci_mmio_write), -#endif - }; - + const SystemCallFilterSet *set; scmp_filter_ctx *seccomp; - unsigned i; + const char *sys; + bool syscalls_found = false; int r; assert(c); - /* If PrivateDevices= is set, also turn off iopl and friends. */ + /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; @@ -1463,12 +1450,40 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - for (i = 0; i < ELEMENTSOF(device_syscalls); i++) { + for (set = syscall_filter_sets; set->set_name; set++) + if (streq(set->set_name, "@raw-io")) { + syscalls_found = true; + break; + } + + /* We should never fail here */ + if (!syscalls_found) { + r = -EOPNOTSUPP; + goto finish; + } + + NULSTR_FOREACH(sys, set->value) { + int id; + bool add = true; + +#ifndef __NR_s390_pci_mmio_read + if (streq(sys, "s390_pci_mmio_read")) + add = false; +#endif +#ifndef __NR_s390_pci_mmio_write + if (streq(sys, "s390_pci_mmio_write")) + add = false; +#endif + + if (!add) + continue; + + id = seccomp_syscall_resolve_name(sys); + r = seccomp_rule_add( seccomp, SCMP_ACT_ERRNO(EPERM), - device_syscalls[i], - 0); + id, 0); if (r < 0) goto finish; }