diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index a17db8d850..3bd790b485 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1490,6 +1490,10 @@ RestrictNamespaces=~cgroup net @sync Synchronizing files and memory to disk: (fsync2, msync2, and related calls) + + @system-service + A reasonable set of system calls used by common system services, excluding any special purpose calls. This is the recommended starting point for whitelisting system calls for system services, as it contains what is typically needed by system services, but excludes overly specific interfaces. For example, the following APIs are excluded: @clock, @mount, @swap, @reboot. + @timer System calls for scheduling operations by time (alarm2, timer_create2, …) @@ -1504,6 +1508,14 @@ RestrictNamespaces=~cgroup net systemd-analyze syscall-filter to list the actual list of system calls in each filter. + Generally, whitelisting system calls (rather than blacklisting) is the safer mode of operation. It is + recommended to enforce system call whitelists for all long-running system services. Specifically, the + following lines are a relatively safe basic choice for the majority of system services: + + [Service] +SystemCallFilter=@system-service +SystemCallErrorNumber=EPERM + It is recommended to combine the file system namespacing related options with SystemCallFilter=~@mount, in order to prohibit the unit's processes to undo the mappings. Specifically these are the options PrivateTmp=, diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 517a1b4509..4a02d8c35f 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -756,6 +756,75 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "sync_file_range\0" "syncfs\0" }, + [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = { + .name = "@system-service", + .help = "General system service operations", + .value = + "@aio\0" + "@basic-io\0" + "@chown\0" + "@default\0" + "@file-system\0" + "@io-event\0" + "@ipc\0" + "@keyring\0" + "@memlock\0" + "@network-io\0" + "@process\0" + "@resources\0" + "@setuid\0" + "@signal\0" + "@sync\0" + "@timer\0" + "brk\0" + "capget\0" + "capset\0" + "copy_file_range\0" + "fadvise64\0" + "fadvise64_64\0" + "flock\0" + "get_mempolicy\0" + "getcpu\0" + "getpriority\0" + "getrandom\0" + "ioctl\0" + "ioprio_get\0" + "kcmp\0" + "madvise\0" + "mincore\0" + "mprotect\0" + "mremap\0" + "name_to_handle_at\0" + "oldolduname\0" + "olduname\0" + "personality\0" + "readahead\0" + "readdir\0" + "remap_file_pages\0" + "sched_get_priority_max\0" + "sched_get_priority_min\0" + "sched_getaffinity\0" + "sched_getattr\0" + "sched_getparam\0" + "sched_getscheduler\0" + "sched_rr_get_interval\0" + "sched_yield\0" + "sendfile\0" + "sendfile64\0" + "setfsgid\0" + "setfsgid32\0" + "setfsuid\0" + "setfsuid32\0" + "setpgid\0" + "setsid\0" + "splice\0" + "sysinfo\0" + "tee\0" + "umask\0" + "uname\0" + "userfaultfd\0" + "vmsplice\0" + }, [SYSCALL_FILTER_SET_TIMER] = { .name = "@timer", .help = "Schedule operations by time", diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 7dfff9df78..eac857afb9 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -47,6 +47,7 @@ enum { SYSCALL_FILTER_SET_SIGNAL, SYSCALL_FILTER_SET_SWAP, SYSCALL_FILTER_SET_SYNC, + SYSCALL_FILTER_SET_SYSTEM_SERVICE, SYSCALL_FILTER_SET_TIMER, _SYSCALL_FILTER_SET_MAX }; diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c index 33ec680753..d82cb5c1c5 100644 --- a/src/test/test-seccomp.c +++ b/src/test/test-seccomp.c @@ -104,7 +104,8 @@ static void test_filter_sets(void) { if (pid == 0) { /* Child? */ int fd; - if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */ + /* if we look at the default set (or one that includes it), whitelist instead of blacklist */ + if (IN_SET(i, SYSCALL_FILTER_SET_DEFAULT, SYSCALL_FILTER_SET_SYSTEM_SERVICE)) r = seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN), syscall_filter_sets + i, SCMP_ACT_ALLOW); else r = seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EUCLEAN));