Merge pull request #6818 from poettering/nspawn-whitelist

convert nspawn syscall blacklist into a whitelist (and related stuff)
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2017-09-14 19:47:59 +02:00 committed by GitHub
commit 8b5c528ce8
7 changed files with 359 additions and 83 deletions

View file

@ -723,9 +723,9 @@
system calls will be permitted. The list may optionally be prefixed by <literal>~</literal>, in which case all system calls will be permitted. The list may optionally be prefixed by <literal>~</literal>, in which case all
listed system calls are prohibited. If this command line option is used multiple times the configured lists are listed system calls are prohibited. If this command line option is used multiple times the configured lists are
combined. If both a positive and a negative list (that is one system call list without and one with the combined. If both a positive and a negative list (that is one system call list without and one with the
<literal>~</literal> prefix) are configured, the positive list takes precedence over the negative list. Note <literal>~</literal> prefix) are configured, the negative list takes precedence over the positive list. Note
that <command>systemd-nspawn</command> always implements a system call blacklist (as opposed to a whitelist), that <command>systemd-nspawn</command> always implements a system call whitelist (as opposed to a blacklist),
and this command line option hence adds or removes entries from the default blacklist, depending on the and this command line option hence adds or removes entries from the default whitelist, depending on the
<literal>~</literal> prefix. Note that the applied system call filter is also altered implicitly if additional <literal>~</literal> prefix. Note that the applied system call filter is also altered implicitly if additional
capabilities are passed using the <command>--capabilities=</command>.</para></listitem> capabilities are passed using the <command>--capabilities=</command>.</para></listitem>
</varlistentry> </varlistentry>

View file

@ -239,7 +239,7 @@
<command>systemd-run</command> command itself. This allows <command>systemd-run</command> <command>systemd-run</command> command itself. This allows <command>systemd-run</command>
to be used within shell pipelines. to be used within shell pipelines.
Note that this mode is not suitable for interactive command shells and similar, as the Note that this mode is not suitable for interactive command shells and similar, as the
service process will become a TTY controller when invoked on a terminal. Use <option>--pty</option> instead service process will not become a TTY controller when invoked on a terminal. Use <option>--pty</option> instead
in that case.</para> in that case.</para>
<para>When both <option>--pipe</option> and <option>--pty</option> are used in combination the more appropriate <para>When both <option>--pipe</option> and <option>--pty</option> are used in combination the more appropriate

View file

@ -1485,6 +1485,10 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<entry>@cpu-emulation</entry> <entry>@cpu-emulation</entry>
<entry>System calls for CPU emulation functionality (<citerefentry project='man-pages'><refentrytitle>vm86</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry> <entry>System calls for CPU emulation functionality (<citerefentry project='man-pages'><refentrytitle>vm86</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
</row> </row>
<row>
<entry>@credentials</entry>
<entry>System calls for querying process credentials (<citerefentry project='man-pages'><refentrytitle>getuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>capget</refentrytitle><manvolnum>2</manvolnum></citerefentry>, and related calls)</entry>
</row>
<row> <row>
<entry>@debug</entry> <entry>@debug</entry>
<entry>Debugging, performance monitoring and tracing functionality (<citerefentry project='man-pages'><refentrytitle>ptrace</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>perf_event_open</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry> <entry>Debugging, performance monitoring and tracing functionality (<citerefentry project='man-pages'><refentrytitle>ptrace</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>perf_event_open</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
@ -1505,6 +1509,10 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<entry>@keyring</entry> <entry>@keyring</entry>
<entry>Kernel keyring access (<citerefentry project='man-pages'><refentrytitle>keyctl</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry> <entry>Kernel keyring access (<citerefentry project='man-pages'><refentrytitle>keyctl</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
</row> </row>
<row>
<entry>@memlock</entry>
<entry>Locking of memory into RAM (<citerefentry project='man-pages'><refentrytitle>mlock</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>mlockall</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
</row>
<row> <row>
<entry>@module</entry> <entry>@module</entry>
<entry>Loading and unloading of kernel modules (<citerefentry project='man-pages'><refentrytitle>init_module</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>delete_module</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry> <entry>Loading and unloading of kernel modules (<citerefentry project='man-pages'><refentrytitle>init_module</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>delete_module</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
@ -1545,10 +1553,18 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<entry>@setuid</entry> <entry>@setuid</entry>
<entry>System calls for changing user ID and group ID credentials, (<citerefentry project='man-pages'><refentrytitle>setuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setgid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setresuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry> <entry>System calls for changing user ID and group ID credentials, (<citerefentry project='man-pages'><refentrytitle>setuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setgid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setresuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row> </row>
<row>
<entry>@signal</entry>
<entry>System calls for manipulating and handling process signals (<citerefentry project='man-pages'><refentrytitle>signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>sigprocmask</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row>
<row> <row>
<entry>@swap</entry> <entry>@swap</entry>
<entry>System calls for enabling/disabling swap devices (<citerefentry project='man-pages'><refentrytitle>swapon</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>swapoff</refentrytitle><manvolnum>2</manvolnum></citerefentry>)</entry> <entry>System calls for enabling/disabling swap devices (<citerefentry project='man-pages'><refentrytitle>swapon</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>swapoff</refentrytitle><manvolnum>2</manvolnum></citerefentry>)</entry>
</row> </row>
<row>
<entry>@timer</entry>
<entry>System calls for scheduling operations by time (<citerefentry project='man-pages'><refentrytitle>alarm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>timer_create</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row>
</tbody> </tbody>
</tgroup> </tgroup>
</table> </table>

View file

@ -47,47 +47,154 @@ static int seccomp_add_default_syscall_filter(
static const struct { static const struct {
uint64_t capability; uint64_t capability;
const char* name; const char* name;
} blacklist[] = { } whitelist[] = {
{ 0, "@obsolete" }, /* Let's use set names where we can */
{ 0, "@keyring" }, /* keyring is not namespaced */ { 0, "@basic-io" },
{ 0, "bpf" }, { 0, "@credentials" },
{ 0, "kexec_file_load" }, { 0, "@default" },
{ 0, "kexec_load" }, { 0, "@file-system" },
{ 0, "lookup_dcookie" }, { 0, "@io-event" },
{ 0, "open_by_handle_at" }, { 0, "@ipc" },
{ 0, "perf_event_open" }, { 0, "@mount" },
{ 0, "quotactl" }, { 0, "@network-io" },
{ 0, "@swap" }, { 0, "@process" },
{ CAP_SYSLOG, "syslog" }, { 0, "@resources" },
{ CAP_SYS_MODULE, "@module" }, { 0, "@setuid" },
{ CAP_SYS_PACCT, "acct" }, { 0, "@signal" },
{ CAP_SYS_PTRACE, "process_vm_readv" }, { 0, "@timer" },
{ CAP_SYS_PTRACE, "process_vm_writev" },
{ CAP_SYS_PTRACE, "ptrace" }, /* The following four are sets we optionally enable, in case the caps have been configured for it */
{ CAP_SYS_RAWIO, "@raw-io" }, { CAP_SYS_TIME, "@clock" },
{ CAP_SYS_TIME, "@clock" }, { CAP_SYS_MODULE, "@module" },
{ CAP_SYS_RAWIO, "@raw-io" },
{ CAP_IPC_LOCK, "@memlock" },
/* Plus a good set of additional syscalls which are not part of any of the groups above */
{ 0, "brk" },
{ 0, "capset" },
{ 0, "chown" },
{ 0, "chown32" },
{ 0, "copy_file_range" },
{ 0, "fadvise64" },
{ 0, "fadvise64_64" },
{ 0, "fchown" },
{ 0, "fchown32" },
{ 0, "fchownat" },
{ 0, "fdatasync" },
{ 0, "flock" },
{ 0, "fsync" },
{ 0, "get_mempolicy" },
{ 0, "getcpu" },
{ 0, "getpriority" },
{ 0, "getrandom" },
{ 0, "io_cancel" },
{ 0, "io_destroy" },
{ 0, "io_getevents" },
{ 0, "io_setup" },
{ 0, "io_submit" },
{ 0, "ioctl" },
{ 0, "ioprio_get" },
{ 0, "kcmp" },
{ 0, "lchown" },
{ 0, "lchown32" },
{ 0, "madvise" },
{ 0, "mincore" },
{ 0, "mprotect" },
{ 0, "mremap" },
{ 0, "msync" },
{ 0, "name_to_handle_at" },
{ 0, "oldolduname" },
{ 0, "olduname" },
{ 0, "personality" },
{ 0, "preadv2" },
{ 0, "pwritev2" },
{ 0, "readahead" },
{ 0, "readdir" },
{ 0, "remap_file_pages" },
{ 0, "sched_get_priority_max" },
{ 0, "sched_get_priority_min" },
{ 0, "sched_getaffinity" },
{ 0, "sched_getattr" },
{ 0, "sched_getparam" },
{ 0, "sched_getscheduler" },
{ 0, "sched_rr_get_interval" },
{ 0, "sched_yield" },
{ 0, "seccomp" },
{ 0, "sendfile" },
{ 0, "sendfile64" },
{ 0, "setdomainname" },
{ 0, "setfsgid" },
{ 0, "setfsgid32" },
{ 0, "setfsuid" },
{ 0, "setfsuid32" },
{ 0, "sethostname" },
{ 0, "setpgid" },
{ 0, "setsid" },
{ 0, "splice" },
{ 0, "sync" },
{ 0, "sync_file_range" },
{ 0, "syncfs" },
{ 0, "sysinfo" },
{ 0, "tee" },
{ 0, "ugetrlimit" },
{ 0, "umask" },
{ 0, "uname" },
{ 0, "userfaultfd" },
{ 0, "vmsplice" },
/* The following individual syscalls are added depending on specified caps */
{ CAP_SYS_PACCT, "acct" },
{ CAP_SYS_PTRACE, "process_vm_readv" },
{ CAP_SYS_PTRACE, "process_vm_writev" },
{ CAP_SYS_PTRACE, "ptrace" },
{ CAP_SYS_BOOT, "reboot" },
{ CAP_SYSLOG, "syslog" },
{ CAP_SYS_TTY_CONFIG, "vhangup" },
/*
* The following syscalls and groups are knowingly excluded:
*
* @cpu-emulation
* @keyring (NB: keyring is not namespaced!)
* @obsolete
* @swap
*
* bpf (NB: bpffs is not namespaced!)
* fanotify_init
* fanotify_mark
* kexec_file_load
* kexec_load
* lookup_dcookie
* nfsservctl
* open_by_handle_at
* perf_event_open
* pkey_alloc
* pkey_free
* pkey_mprotect
* quotactl
*/
}; };
int r, c = 0; int r, c = 0;
size_t i; size_t i;
char **p; char **p;
for (i = 0; i < ELEMENTSOF(blacklist); i++) { for (i = 0; i < ELEMENTSOF(whitelist); i++) {
if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability))) if (whitelist[i].capability != 0 && (cap_list_retain & (1ULL << whitelist[i].capability)) == 0)
continue; continue;
r = seccomp_add_syscall_filter_item(ctx, blacklist[i].name, SCMP_ACT_ERRNO(EPERM), syscall_whitelist); r = seccomp_add_syscall_filter_item(ctx, whitelist[i].name, SCMP_ACT_ALLOW, syscall_blacklist);
if (r < 0) if (r < 0)
/* If the system call is not known on this architecture, then that's fine, let's ignore it */ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", blacklist[i].name); log_debug_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m", whitelist[i].name, seccomp_arch_to_string(arch));
else else
c++; c++;
} }
STRV_FOREACH(p, syscall_blacklist) { STRV_FOREACH(p, syscall_whitelist) {
r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ERRNO(EPERM), syscall_whitelist); r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ALLOW, syscall_blacklist);
if (r < 0) if (r < 0)
log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", *p); log_debug_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m", *p, seccomp_arch_to_string(arch));
else else
c++; c++;
} }
@ -106,18 +213,33 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **sys
SECCOMP_FOREACH_LOCAL_ARCH(arch) { SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
int n;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); log_debug("Applying whitelist on architecture: %s", seccomp_arch_to_string(arch));
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(EPERM));
if (r < 0)
return log_error_errno(r, "Failed to allocate seccomp object: %m");
r = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
if (r < 0)
return r;
r = seccomp_load(seccomp);
if (IN_SET(r, -EPERM, -EACCES))
return log_error_errno(r, "Failed to install seccomp filter: %m");
if (r < 0)
log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
log_debug("Applying NETLINK_AUDIT mask on architecture: %s", seccomp_arch_to_string(arch));
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0) if (r < 0)
return log_error_errno(r, "Failed to allocate seccomp object: %m"); return log_error_errno(r, "Failed to allocate seccomp object: %m");
n = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
if (n < 0)
return n;
/* /*
Audit is broken in containers, much of the userspace audit hookup will fail if running inside a Audit is broken in containers, much of the userspace audit hookup will fail if running inside a
container. We don't care and just turn off creation of audit sockets. container. We don't care and just turn off creation of audit sockets.
@ -133,13 +255,10 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **sys
2, 2,
SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
if (r < 0) if (r < 0) {
log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m"); log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m");
else
n++;
if (n <= 0) /* no rule added? then skip this architecture */
continue; continue;
}
r = seccomp_load(seccomp); r = seccomp_load(seccomp);
if (IN_SET(r, -EPERM, -EACCES)) if (IN_SET(r, -EPERM, -EACCES))

View file

@ -278,11 +278,19 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"execve\0" "execve\0"
"exit\0" "exit\0"
"exit_group\0" "exit_group\0"
"futex\0"
"get_robust_list\0"
"get_thread_area\0"
"getrlimit\0" /* make sure processes can query stack size and such */ "getrlimit\0" /* make sure processes can query stack size and such */
"gettimeofday\0" "gettimeofday\0"
"membarrier\0"
"nanosleep\0" "nanosleep\0"
"pause\0" "pause\0"
"restart_syscall\0"
"rt_sigreturn\0" "rt_sigreturn\0"
"set_robust_list\0"
"set_thread_area\0"
"set_tid_address\0"
"sigreturn\0" "sigreturn\0"
"time\0" "time\0"
}, },
@ -290,10 +298,11 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.name = "@basic-io", .name = "@basic-io",
.help = "Basic IO", .help = "Basic IO",
.value = .value =
"_llseek\0"
"close\0" "close\0"
"dup\0"
"dup2\0" "dup2\0"
"dup3\0" "dup3\0"
"dup\0"
"lseek\0" "lseek\0"
"pread64\0" "pread64\0"
"preadv\0" "preadv\0"
@ -324,6 +333,32 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"vm86\0" "vm86\0"
"vm86old\0" "vm86old\0"
}, },
[SYSCALL_FILTER_SET_CREDENTIALS] = {
.name = "@credentials",
.help = "Query own process credentials",
.value =
"capget\0"
"getegid\0"
"getegid32\0"
"geteuid\0"
"geteuid32\0"
"getgid\0"
"getgid32\0"
"getgroups\0"
"getgroups32\0"
"getpgid\0"
"getpgrp\0"
"getpid\0"
"getppid\0"
"getresgid\0"
"getresgid32\0"
"getresuid\0"
"getresuid32\0"
"getsid\0"
"gettid\0"
"getuid\0"
"getuid32\0"
},
[SYSCALL_FILTER_SET_DEBUG] = { [SYSCALL_FILTER_SET_DEBUG] = {
.name = "@debug", .name = "@debug",
.help = "Debugging, performance monitoring and tracing functionality", .help = "Debugging, performance monitoring and tracing functionality",
@ -353,24 +388,26 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"fchdir\0" "fchdir\0"
"fchmod\0" "fchmod\0"
"fchmodat\0" "fchmodat\0"
"fcntl64\0"
"fcntl\0" "fcntl\0"
"fcntl64\0"
"fgetxattr\0" "fgetxattr\0"
"flistxattr\0" "flistxattr\0"
"fremovexattr\0"
"fsetxattr\0" "fsetxattr\0"
"fstat64\0"
"fstat\0" "fstat\0"
"fstat64\0"
"fstatat64\0" "fstatat64\0"
"fstatfs64\0"
"fstatfs\0" "fstatfs\0"
"ftruncate64\0" "fstatfs64\0"
"ftruncate\0" "ftruncate\0"
"ftruncate64\0"
"futimesat\0" "futimesat\0"
"getcwd\0" "getcwd\0"
"getdents64\0"
"getdents\0" "getdents\0"
"getdents64\0"
"getxattr\0" "getxattr\0"
"inotify_add_watch\0" "inotify_add_watch\0"
"inotify_init\0"
"inotify_init1\0" "inotify_init1\0"
"inotify_rm_watch\0" "inotify_rm_watch\0"
"lgetxattr\0" "lgetxattr\0"
@ -380,36 +417,43 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"llistxattr\0" "llistxattr\0"
"lremovexattr\0" "lremovexattr\0"
"lsetxattr\0" "lsetxattr\0"
"lstat64\0"
"lstat\0" "lstat\0"
"lstat64\0"
"mkdir\0" "mkdir\0"
"mkdirat\0" "mkdirat\0"
"mknod\0" "mknod\0"
"mknodat\0" "mknodat\0"
"mmap2\0"
"mmap\0" "mmap\0"
"mmap2\0"
"munmap\0" "munmap\0"
"newfstatat\0" "newfstatat\0"
"oldfstat\0"
"oldlstat\0"
"oldstat\0"
"open\0" "open\0"
"openat\0" "openat\0"
"readlink\0" "readlink\0"
"readlinkat\0" "readlinkat\0"
"removexattr\0" "removexattr\0"
"rename\0" "rename\0"
"renameat2\0"
"renameat\0" "renameat\0"
"renameat2\0"
"rmdir\0" "rmdir\0"
"setxattr\0" "setxattr\0"
"stat64\0"
"stat\0" "stat\0"
"stat64\0"
"statfs\0" "statfs\0"
"statfs64\0"
#ifdef __PNR_statx
"statx\0" "statx\0"
#endif
"symlink\0" "symlink\0"
"symlinkat\0" "symlinkat\0"
"truncate64\0"
"truncate\0" "truncate\0"
"truncate64\0"
"unlink\0" "unlink\0"
"unlinkat\0" "unlinkat\0"
"utime\0"
"utimensat\0" "utimensat\0"
"utimes\0" "utimes\0"
}, },
@ -418,15 +462,15 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.help = "Event loop system calls", .help = "Event loop system calls",
.value = .value =
"_newselect\0" "_newselect\0"
"epoll_create1\0"
"epoll_create\0" "epoll_create\0"
"epoll_create1\0"
"epoll_ctl\0" "epoll_ctl\0"
"epoll_ctl_old\0" "epoll_ctl_old\0"
"epoll_pwait\0" "epoll_pwait\0"
"epoll_wait\0" "epoll_wait\0"
"epoll_wait_old\0" "epoll_wait_old\0"
"eventfd2\0"
"eventfd\0" "eventfd\0"
"eventfd2\0"
"poll\0" "poll\0"
"ppoll\0" "ppoll\0"
"pselect6\0" "pselect6\0"
@ -448,8 +492,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"msgget\0" "msgget\0"
"msgrcv\0" "msgrcv\0"
"msgsnd\0" "msgsnd\0"
"pipe2\0"
"pipe\0" "pipe\0"
"pipe2\0"
"process_vm_readv\0" "process_vm_readv\0"
"process_vm_writev\0" "process_vm_writev\0"
"semctl\0" "semctl\0"
@ -469,6 +513,16 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"keyctl\0" "keyctl\0"
"request_key\0" "request_key\0"
}, },
[SYSCALL_FILTER_SET_MEMLOCK] = {
.name = "@memlock",
.help = "Memory locking control",
.value =
"mlock\0"
"mlock2\0"
"mlockall\0"
"munlock\0"
"munlockall\0"
},
[SYSCALL_FILTER_SET_MODULE] = { [SYSCALL_FILTER_SET_MODULE] = {
.name = "@module", .name = "@module",
.help = "Loading and unloading of kernel modules", .help = "Loading and unloading of kernel modules",
@ -484,15 +538,15 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"chroot\0" "chroot\0"
"mount\0" "mount\0"
"pivot_root\0" "pivot_root\0"
"umount2\0"
"umount\0" "umount\0"
"umount2\0"
}, },
[SYSCALL_FILTER_SET_NETWORK_IO] = { [SYSCALL_FILTER_SET_NETWORK_IO] = {
.name = "@network-io", .name = "@network-io",
.help = "Network or Unix socket IO, should not be needed if not network facing", .help = "Network or Unix socket IO, should not be needed if not network facing",
.value = .value =
"accept4\0"
"accept\0" "accept\0"
"accept4\0"
"bind\0" "bind\0"
"connect\0" "connect\0"
"getpeername\0" "getpeername\0"
@ -527,6 +581,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"get_kernel_syms\0" "get_kernel_syms\0"
"getpmsg\0" "getpmsg\0"
"gtty\0" "gtty\0"
"idle\0"
"lock\0" "lock\0"
"mpx\0" "mpx\0"
"prof\0" "prof\0"
@ -551,38 +606,38 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"@clock\0" "@clock\0"
"@module\0" "@module\0"
"@raw-io\0" "@raw-io\0"
"_sysctl\0"
"acct\0" "acct\0"
"bpf\0" "bpf\0"
"capset\0" "capset\0"
"chown32\0"
"chown\0" "chown\0"
"chown32\0"
"chroot\0" "chroot\0"
"fchown32\0"
"fchown\0" "fchown\0"
"fchown32\0"
"fchownat\0" "fchownat\0"
"kexec_file_load\0" "kexec_file_load\0"
"kexec_load\0" "kexec_load\0"
"lchown32\0"
"lchown\0" "lchown\0"
"lchown32\0"
"nfsservctl\0" "nfsservctl\0"
"pivot_root\0" "pivot_root\0"
"quotactl\0" "quotactl\0"
"reboot\0" "reboot\0"
"setdomainname\0" "setdomainname\0"
"setfsuid32\0"
"setfsuid\0" "setfsuid\0"
"setgroups32\0" "setfsuid32\0"
"setgroups\0" "setgroups\0"
"setgroups32\0"
"sethostname\0" "sethostname\0"
"setresuid32\0"
"setresuid\0" "setresuid\0"
"setreuid32\0" "setresuid32\0"
"setreuid\0" "setreuid\0"
"setuid32\0" "setreuid32\0"
"setuid\0" "setuid\0"
"setuid32\0"
"swapoff\0" "swapoff\0"
"swapon\0" "swapon\0"
"_sysctl\0"
"vhangup\0" "vhangup\0"
}, },
[SYSCALL_FILTER_SET_PROCESS] = { [SYSCALL_FILTER_SET_PROCESS] = {
@ -593,13 +648,23 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"clone\0" "clone\0"
"execveat\0" "execveat\0"
"fork\0" "fork\0"
"getpid\0"
"getppid\0"
"getrusage\0"
"gettid\0"
"kill\0" "kill\0"
"prctl\0" "prctl\0"
"rt_sigqueueinfo\0"
"rt_tgsigqueueinfo\0"
"setns\0" "setns\0"
"tgkill\0" "tgkill\0"
"times\0"
"tkill\0" "tkill\0"
"unshare\0" "unshare\0"
"vfork\0" "vfork\0"
"wait4\0"
"waitid\0"
"waitpid\0"
}, },
[SYSCALL_FILTER_SET_RAW_IO] = { [SYSCALL_FILTER_SET_RAW_IO] = {
.name = "@raw-io", .name = "@raw-io",
@ -629,36 +694,56 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.name = "@resources", .name = "@resources",
.help = "Alter resource settings", .help = "Alter resource settings",
.value = .value =
"sched_setparam\0" "ioprio_set\0"
"sched_setscheduler\0" "mbind\0"
"sched_setaffinity\0"
"setpriority\0"
"setrlimit\0"
"set_mempolicy\0"
"migrate_pages\0" "migrate_pages\0"
"move_pages\0" "move_pages\0"
"mbind\0" "nice\0"
"sched_setattr\0"
"prlimit64\0" "prlimit64\0"
"sched_setaffinity\0"
"sched_setattr\0"
"sched_setparam\0"
"sched_setscheduler\0"
"set_mempolicy\0"
"setpriority\0"
"setrlimit\0"
}, },
[SYSCALL_FILTER_SET_SETUID] = { [SYSCALL_FILTER_SET_SETUID] = {
.name = "@setuid", .name = "@setuid",
.help = "Operations for changing user/group credentials", .help = "Operations for changing user/group credentials",
.value = .value =
"setgid32\0"
"setgid\0" "setgid\0"
"setgroups32\0" "setgid32\0"
"setgroups\0" "setgroups\0"
"setregid32\0" "setgroups32\0"
"setregid\0" "setregid\0"
"setresgid32\0" "setregid32\0"
"setresgid\0" "setresgid\0"
"setresuid32\0" "setresgid32\0"
"setresuid\0" "setresuid\0"
"setreuid32\0" "setresuid32\0"
"setreuid\0" "setreuid\0"
"setuid32\0" "setreuid32\0"
"setuid\0" "setuid\0"
"setuid32\0"
},
[SYSCALL_FILTER_SET_SIGNAL] = {
.name = "@signal",
.help = "Process signal handling",
.value =
"rt_sigaction\0"
"rt_sigpending\0"
"rt_sigprocmask\0"
"rt_sigsuspend\0"
"rt_sigtimedwait\0"
"sigaction\0"
"sigaltstack\0"
"signal\0"
"signalfd\0"
"signalfd4\0"
"sigpending\0"
"sigprocmask\0"
"sigsuspend\0"
}, },
[SYSCALL_FILTER_SET_SWAP] = { [SYSCALL_FILTER_SET_SWAP] = {
.name = "@swap", .name = "@swap",
@ -667,6 +752,23 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"swapoff\0" "swapoff\0"
"swapon\0" "swapon\0"
}, },
[SYSCALL_FILTER_SET_TIMER] = {
.name = "@timer",
.help = "Schedule operations by time",
.value =
"alarm\0"
"getitimer\0"
"setitimer\0"
"timer_create\0"
"timer_delete\0"
"timer_getoverrun\0"
"timer_gettime\0"
"timer_settime\0"
"timerfd_create\0"
"timerfd_gettime\0"
"timerfd_settime\0"
"times\0"
},
}; };
const SyscallFilterSet *syscall_filter_set_find(const char *name) { const SyscallFilterSet *syscall_filter_set_find(const char *name) {
@ -697,8 +799,10 @@ int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name,
const SyscallFilterSet *other; const SyscallFilterSet *other;
other = syscall_filter_set_find(name); other = syscall_filter_set_find(name);
if (!other) if (!other) {
log_debug("Filter set %s is not known!", name);
return -EINVAL; return -EINVAL;
}
r = seccomp_add_syscall_filter_set(seccomp, other, action, exclude); r = seccomp_add_syscall_filter_set(seccomp, other, action, exclude);
if (r < 0) if (r < 0)
@ -707,8 +811,10 @@ int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name,
int id; int id;
id = seccomp_syscall_resolve_name(name); id = seccomp_syscall_resolve_name(name);
if (id == __NR_SCMP_ERROR) if (id == __NR_SCMP_ERROR) {
log_debug("System call %s is not known!", name);
return -EINVAL; /* Not known at all? Then that's a real error */ return -EINVAL; /* Not known at all? Then that's a real error */
}
r = seccomp_rule_add_exact(seccomp, action, id, 0); r = seccomp_rule_add_exact(seccomp, action, id, 0);
if (r < 0) if (r < 0)

View file

@ -44,11 +44,13 @@ enum {
SYSCALL_FILTER_SET_BASIC_IO, SYSCALL_FILTER_SET_BASIC_IO,
SYSCALL_FILTER_SET_CLOCK, SYSCALL_FILTER_SET_CLOCK,
SYSCALL_FILTER_SET_CPU_EMULATION, SYSCALL_FILTER_SET_CPU_EMULATION,
SYSCALL_FILTER_SET_CREDENTIALS,
SYSCALL_FILTER_SET_DEBUG, SYSCALL_FILTER_SET_DEBUG,
SYSCALL_FILTER_SET_FILE_SYSTEM, SYSCALL_FILTER_SET_FILE_SYSTEM,
SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_IO_EVENT,
SYSCALL_FILTER_SET_IPC, SYSCALL_FILTER_SET_IPC,
SYSCALL_FILTER_SET_KEYRING, SYSCALL_FILTER_SET_KEYRING,
SYSCALL_FILTER_SET_MEMLOCK,
SYSCALL_FILTER_SET_MODULE, SYSCALL_FILTER_SET_MODULE,
SYSCALL_FILTER_SET_MOUNT, SYSCALL_FILTER_SET_MOUNT,
SYSCALL_FILTER_SET_NETWORK_IO, SYSCALL_FILTER_SET_NETWORK_IO,
@ -59,7 +61,9 @@ enum {
SYSCALL_FILTER_SET_REBOOT, SYSCALL_FILTER_SET_REBOOT,
SYSCALL_FILTER_SET_RESOURCES, SYSCALL_FILTER_SET_RESOURCES,
SYSCALL_FILTER_SET_SETUID, SYSCALL_FILTER_SET_SETUID,
SYSCALL_FILTER_SET_SIGNAL,
SYSCALL_FILTER_SET_SWAP, SYSCALL_FILTER_SET_SWAP,
SYSCALL_FILTER_SET_TIMER,
_SYSCALL_FILTER_SET_MAX _SYSCALL_FILTER_SET_MAX
}; };

View file

@ -612,6 +612,36 @@ static void test_lock_personality(void) {
assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS); assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
} }
static void test_filter_sets_ordered(void) {
size_t i;
/* Ensure "@default" always remains at the beginning of the list */
assert_se(SYSCALL_FILTER_SET_DEFAULT == 0);
assert_se(streq(syscall_filter_sets[0].name, "@default"));
for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
const char *k, *p = NULL;
/* Make sure each group has a description */
assert_se(!isempty(syscall_filter_sets[0].help));
/* Make sure the groups are ordered alphabetically, except for the first entry */
assert_se(i < 2 || strcmp(syscall_filter_sets[i-1].name, syscall_filter_sets[i].name) < 0);
NULSTR_FOREACH(k, syscall_filter_sets[i].value) {
/* Ensure each syscall list is in itself ordered, but groups before names */
assert_se(!p ||
(*p == '@' && *k != '@') ||
(((*p == '@' && *k == '@') ||
(*p != '@' && *k != '@')) &&
strcmp(p, k) < 0));
p = k;
}
}
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
log_set_max_level(LOG_DEBUG); log_set_max_level(LOG_DEBUG);
@ -629,6 +659,7 @@ int main(int argc, char *argv[]) {
test_restrict_archs(); test_restrict_archs();
test_load_syscall_filter_set_raw(); test_load_syscall_filter_set_raw();
test_lock_personality(); test_lock_personality();
test_filter_sets_ordered();
return 0; return 0;
} }