Merge pull request #6818 from poettering/nspawn-whitelist

convert nspawn syscall blacklist into a whitelist (and related stuff)
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2017-09-14 19:47:59 +02:00 committed by GitHub
commit 8b5c528ce8
7 changed files with 359 additions and 83 deletions

View File

@ -723,9 +723,9 @@
system calls will be permitted. The list may optionally be prefixed by <literal>~</literal>, in which case all
listed system calls are prohibited. If this command line option is used multiple times the configured lists are
combined. If both a positive and a negative list (that is one system call list without and one with the
<literal>~</literal> prefix) are configured, the positive list takes precedence over the negative list. Note
that <command>systemd-nspawn</command> always implements a system call blacklist (as opposed to a whitelist),
and this command line option hence adds or removes entries from the default blacklist, depending on the
<literal>~</literal> prefix) are configured, the negative list takes precedence over the positive list. Note
that <command>systemd-nspawn</command> always implements a system call whitelist (as opposed to a blacklist),
and this command line option hence adds or removes entries from the default whitelist, depending on the
<literal>~</literal> prefix. Note that the applied system call filter is also altered implicitly if additional
capabilities are passed using the <command>--capabilities=</command>.</para></listitem>
</varlistentry>

View File

@ -239,7 +239,7 @@
<command>systemd-run</command> command itself. This allows <command>systemd-run</command>
to be used within shell pipelines.
Note that this mode is not suitable for interactive command shells and similar, as the
service process will become a TTY controller when invoked on a terminal. Use <option>--pty</option> instead
service process will not become a TTY controller when invoked on a terminal. Use <option>--pty</option> instead
in that case.</para>
<para>When both <option>--pipe</option> and <option>--pty</option> are used in combination the more appropriate

View File

@ -1485,6 +1485,10 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<entry>@cpu-emulation</entry>
<entry>System calls for CPU emulation functionality (<citerefentry project='man-pages'><refentrytitle>vm86</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
</row>
<row>
<entry>@credentials</entry>
<entry>System calls for querying process credentials (<citerefentry project='man-pages'><refentrytitle>getuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>capget</refentrytitle><manvolnum>2</manvolnum></citerefentry>, and related calls)</entry>
</row>
<row>
<entry>@debug</entry>
<entry>Debugging, performance monitoring and tracing functionality (<citerefentry project='man-pages'><refentrytitle>ptrace</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>perf_event_open</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
@ -1505,6 +1509,10 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<entry>@keyring</entry>
<entry>Kernel keyring access (<citerefentry project='man-pages'><refentrytitle>keyctl</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
</row>
<row>
<entry>@memlock</entry>
<entry>Locking of memory into RAM (<citerefentry project='man-pages'><refentrytitle>mlock</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>mlockall</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
</row>
<row>
<entry>@module</entry>
<entry>Loading and unloading of kernel modules (<citerefentry project='man-pages'><refentrytitle>init_module</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>delete_module</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
@ -1545,10 +1553,18 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<entry>@setuid</entry>
<entry>System calls for changing user ID and group ID credentials, (<citerefentry project='man-pages'><refentrytitle>setuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setgid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setresuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row>
<row>
<entry>@signal</entry>
<entry>System calls for manipulating and handling process signals (<citerefentry project='man-pages'><refentrytitle>signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>sigprocmask</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row>
<row>
<entry>@swap</entry>
<entry>System calls for enabling/disabling swap devices (<citerefentry project='man-pages'><refentrytitle>swapon</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>swapoff</refentrytitle><manvolnum>2</manvolnum></citerefentry>)</entry>
</row>
<row>
<entry>@timer</entry>
<entry>System calls for scheduling operations by time (<citerefentry project='man-pages'><refentrytitle>alarm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>timer_create</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row>
</tbody>
</tgroup>
</table>

View File

@ -47,47 +47,154 @@ static int seccomp_add_default_syscall_filter(
static const struct {
uint64_t capability;
const char* name;
} blacklist[] = {
{ 0, "@obsolete" },
{ 0, "@keyring" }, /* keyring is not namespaced */
{ 0, "bpf" },
{ 0, "kexec_file_load" },
{ 0, "kexec_load" },
{ 0, "lookup_dcookie" },
{ 0, "open_by_handle_at" },
{ 0, "perf_event_open" },
{ 0, "quotactl" },
{ 0, "@swap" },
{ CAP_SYSLOG, "syslog" },
{ CAP_SYS_MODULE, "@module" },
{ CAP_SYS_PACCT, "acct" },
{ CAP_SYS_PTRACE, "process_vm_readv" },
{ CAP_SYS_PTRACE, "process_vm_writev" },
{ CAP_SYS_PTRACE, "ptrace" },
{ CAP_SYS_RAWIO, "@raw-io" },
{ CAP_SYS_TIME, "@clock" },
} whitelist[] = {
/* Let's use set names where we can */
{ 0, "@basic-io" },
{ 0, "@credentials" },
{ 0, "@default" },
{ 0, "@file-system" },
{ 0, "@io-event" },
{ 0, "@ipc" },
{ 0, "@mount" },
{ 0, "@network-io" },
{ 0, "@process" },
{ 0, "@resources" },
{ 0, "@setuid" },
{ 0, "@signal" },
{ 0, "@timer" },
/* The following four are sets we optionally enable, in case the caps have been configured for it */
{ CAP_SYS_TIME, "@clock" },
{ CAP_SYS_MODULE, "@module" },
{ CAP_SYS_RAWIO, "@raw-io" },
{ CAP_IPC_LOCK, "@memlock" },
/* Plus a good set of additional syscalls which are not part of any of the groups above */
{ 0, "brk" },
{ 0, "capset" },
{ 0, "chown" },
{ 0, "chown32" },
{ 0, "copy_file_range" },
{ 0, "fadvise64" },
{ 0, "fadvise64_64" },
{ 0, "fchown" },
{ 0, "fchown32" },
{ 0, "fchownat" },
{ 0, "fdatasync" },
{ 0, "flock" },
{ 0, "fsync" },
{ 0, "get_mempolicy" },
{ 0, "getcpu" },
{ 0, "getpriority" },
{ 0, "getrandom" },
{ 0, "io_cancel" },
{ 0, "io_destroy" },
{ 0, "io_getevents" },
{ 0, "io_setup" },
{ 0, "io_submit" },
{ 0, "ioctl" },
{ 0, "ioprio_get" },
{ 0, "kcmp" },
{ 0, "lchown" },
{ 0, "lchown32" },
{ 0, "madvise" },
{ 0, "mincore" },
{ 0, "mprotect" },
{ 0, "mremap" },
{ 0, "msync" },
{ 0, "name_to_handle_at" },
{ 0, "oldolduname" },
{ 0, "olduname" },
{ 0, "personality" },
{ 0, "preadv2" },
{ 0, "pwritev2" },
{ 0, "readahead" },
{ 0, "readdir" },
{ 0, "remap_file_pages" },
{ 0, "sched_get_priority_max" },
{ 0, "sched_get_priority_min" },
{ 0, "sched_getaffinity" },
{ 0, "sched_getattr" },
{ 0, "sched_getparam" },
{ 0, "sched_getscheduler" },
{ 0, "sched_rr_get_interval" },
{ 0, "sched_yield" },
{ 0, "seccomp" },
{ 0, "sendfile" },
{ 0, "sendfile64" },
{ 0, "setdomainname" },
{ 0, "setfsgid" },
{ 0, "setfsgid32" },
{ 0, "setfsuid" },
{ 0, "setfsuid32" },
{ 0, "sethostname" },
{ 0, "setpgid" },
{ 0, "setsid" },
{ 0, "splice" },
{ 0, "sync" },
{ 0, "sync_file_range" },
{ 0, "syncfs" },
{ 0, "sysinfo" },
{ 0, "tee" },
{ 0, "ugetrlimit" },
{ 0, "umask" },
{ 0, "uname" },
{ 0, "userfaultfd" },
{ 0, "vmsplice" },
/* The following individual syscalls are added depending on specified caps */
{ CAP_SYS_PACCT, "acct" },
{ CAP_SYS_PTRACE, "process_vm_readv" },
{ CAP_SYS_PTRACE, "process_vm_writev" },
{ CAP_SYS_PTRACE, "ptrace" },
{ CAP_SYS_BOOT, "reboot" },
{ CAP_SYSLOG, "syslog" },
{ CAP_SYS_TTY_CONFIG, "vhangup" },
/*
* The following syscalls and groups are knowingly excluded:
*
* @cpu-emulation
* @keyring (NB: keyring is not namespaced!)
* @obsolete
* @swap
*
* bpf (NB: bpffs is not namespaced!)
* fanotify_init
* fanotify_mark
* kexec_file_load
* kexec_load
* lookup_dcookie
* nfsservctl
* open_by_handle_at
* perf_event_open
* pkey_alloc
* pkey_free
* pkey_mprotect
* quotactl
*/
};
int r, c = 0;
size_t i;
char **p;
for (i = 0; i < ELEMENTSOF(blacklist); i++) {
if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability)))
for (i = 0; i < ELEMENTSOF(whitelist); i++) {
if (whitelist[i].capability != 0 && (cap_list_retain & (1ULL << whitelist[i].capability)) == 0)
continue;
r = seccomp_add_syscall_filter_item(ctx, blacklist[i].name, SCMP_ACT_ERRNO(EPERM), syscall_whitelist);
r = seccomp_add_syscall_filter_item(ctx, whitelist[i].name, SCMP_ACT_ALLOW, syscall_blacklist);
if (r < 0)
/* If the system call is not known on this architecture, then that's fine, let's ignore it */
log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", blacklist[i].name);
log_debug_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m", whitelist[i].name, seccomp_arch_to_string(arch));
else
c++;
}
STRV_FOREACH(p, syscall_blacklist) {
r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ERRNO(EPERM), syscall_whitelist);
STRV_FOREACH(p, syscall_whitelist) {
r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ALLOW, syscall_blacklist);
if (r < 0)
log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", *p);
log_debug_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m", *p, seccomp_arch_to_string(arch));
else
c++;
}
@ -106,18 +213,33 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **sys
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
int n;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
log_debug("Applying whitelist on architecture: %s", seccomp_arch_to_string(arch));
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(EPERM));
if (r < 0)
return log_error_errno(r, "Failed to allocate seccomp object: %m");
r = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
if (r < 0)
return r;
r = seccomp_load(seccomp);
if (IN_SET(r, -EPERM, -EACCES))
return log_error_errno(r, "Failed to install seccomp filter: %m");
if (r < 0)
log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
log_debug("Applying NETLINK_AUDIT mask on architecture: %s", seccomp_arch_to_string(arch));
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return log_error_errno(r, "Failed to allocate seccomp object: %m");
n = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
if (n < 0)
return n;
/*
Audit is broken in containers, much of the userspace audit hookup will fail if running inside a
container. We don't care and just turn off creation of audit sockets.
@ -133,13 +255,10 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **sys
2,
SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
if (r < 0)
if (r < 0) {
log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m");
else
n++;
if (n <= 0) /* no rule added? then skip this architecture */
continue;
}
r = seccomp_load(seccomp);
if (IN_SET(r, -EPERM, -EACCES))

View File

@ -278,11 +278,19 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"execve\0"
"exit\0"
"exit_group\0"
"futex\0"
"get_robust_list\0"
"get_thread_area\0"
"getrlimit\0" /* make sure processes can query stack size and such */
"gettimeofday\0"
"membarrier\0"
"nanosleep\0"
"pause\0"
"restart_syscall\0"
"rt_sigreturn\0"
"set_robust_list\0"
"set_thread_area\0"
"set_tid_address\0"
"sigreturn\0"
"time\0"
},
@ -290,10 +298,11 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.name = "@basic-io",
.help = "Basic IO",
.value =
"_llseek\0"
"close\0"
"dup\0"
"dup2\0"
"dup3\0"
"dup\0"
"lseek\0"
"pread64\0"
"preadv\0"
@ -324,6 +333,32 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"vm86\0"
"vm86old\0"
},
[SYSCALL_FILTER_SET_CREDENTIALS] = {
.name = "@credentials",
.help = "Query own process credentials",
.value =
"capget\0"
"getegid\0"
"getegid32\0"
"geteuid\0"
"geteuid32\0"
"getgid\0"
"getgid32\0"
"getgroups\0"
"getgroups32\0"
"getpgid\0"
"getpgrp\0"
"getpid\0"
"getppid\0"
"getresgid\0"
"getresgid32\0"
"getresuid\0"
"getresuid32\0"
"getsid\0"
"gettid\0"
"getuid\0"
"getuid32\0"
},
[SYSCALL_FILTER_SET_DEBUG] = {
.name = "@debug",
.help = "Debugging, performance monitoring and tracing functionality",
@ -353,24 +388,26 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"fchdir\0"
"fchmod\0"
"fchmodat\0"
"fcntl64\0"
"fcntl\0"
"fcntl64\0"
"fgetxattr\0"
"flistxattr\0"
"fremovexattr\0"
"fsetxattr\0"
"fstat64\0"
"fstat\0"
"fstat64\0"
"fstatat64\0"
"fstatfs64\0"
"fstatfs\0"
"ftruncate64\0"
"fstatfs64\0"
"ftruncate\0"
"ftruncate64\0"
"futimesat\0"
"getcwd\0"
"getdents64\0"
"getdents\0"
"getdents64\0"
"getxattr\0"
"inotify_add_watch\0"
"inotify_init\0"
"inotify_init1\0"
"inotify_rm_watch\0"
"lgetxattr\0"
@ -380,36 +417,43 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"llistxattr\0"
"lremovexattr\0"
"lsetxattr\0"
"lstat64\0"
"lstat\0"
"lstat64\0"
"mkdir\0"
"mkdirat\0"
"mknod\0"
"mknodat\0"
"mmap2\0"
"mmap\0"
"mmap2\0"
"munmap\0"
"newfstatat\0"
"oldfstat\0"
"oldlstat\0"
"oldstat\0"
"open\0"
"openat\0"
"readlink\0"
"readlinkat\0"
"removexattr\0"
"rename\0"
"renameat2\0"
"renameat\0"
"renameat2\0"
"rmdir\0"
"setxattr\0"
"stat64\0"
"stat\0"
"stat64\0"
"statfs\0"
"statfs64\0"
#ifdef __PNR_statx
"statx\0"
#endif
"symlink\0"
"symlinkat\0"
"truncate64\0"
"truncate\0"
"truncate64\0"
"unlink\0"
"unlinkat\0"
"utime\0"
"utimensat\0"
"utimes\0"
},
@ -418,15 +462,15 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.help = "Event loop system calls",
.value =
"_newselect\0"
"epoll_create1\0"
"epoll_create\0"
"epoll_create1\0"
"epoll_ctl\0"
"epoll_ctl_old\0"
"epoll_pwait\0"
"epoll_wait\0"
"epoll_wait_old\0"
"eventfd2\0"
"eventfd\0"
"eventfd2\0"
"poll\0"
"ppoll\0"
"pselect6\0"
@ -448,8 +492,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"msgget\0"
"msgrcv\0"
"msgsnd\0"
"pipe2\0"
"pipe\0"
"pipe2\0"
"process_vm_readv\0"
"process_vm_writev\0"
"semctl\0"
@ -469,6 +513,16 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"keyctl\0"
"request_key\0"
},
[SYSCALL_FILTER_SET_MEMLOCK] = {
.name = "@memlock",
.help = "Memory locking control",
.value =
"mlock\0"
"mlock2\0"
"mlockall\0"
"munlock\0"
"munlockall\0"
},
[SYSCALL_FILTER_SET_MODULE] = {
.name = "@module",
.help = "Loading and unloading of kernel modules",
@ -484,15 +538,15 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"chroot\0"
"mount\0"
"pivot_root\0"
"umount2\0"
"umount\0"
"umount2\0"
},
[SYSCALL_FILTER_SET_NETWORK_IO] = {
.name = "@network-io",
.help = "Network or Unix socket IO, should not be needed if not network facing",
.value =
"accept4\0"
"accept\0"
"accept4\0"
"bind\0"
"connect\0"
"getpeername\0"
@ -527,6 +581,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"get_kernel_syms\0"
"getpmsg\0"
"gtty\0"
"idle\0"
"lock\0"
"mpx\0"
"prof\0"
@ -551,38 +606,38 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"@clock\0"
"@module\0"
"@raw-io\0"
"_sysctl\0"
"acct\0"
"bpf\0"
"capset\0"
"chown32\0"
"chown\0"
"chown32\0"
"chroot\0"
"fchown32\0"
"fchown\0"
"fchown32\0"
"fchownat\0"
"kexec_file_load\0"
"kexec_load\0"
"lchown32\0"
"lchown\0"
"lchown32\0"
"nfsservctl\0"
"pivot_root\0"
"quotactl\0"
"reboot\0"
"setdomainname\0"
"setfsuid32\0"
"setfsuid\0"
"setgroups32\0"
"setfsuid32\0"
"setgroups\0"
"setgroups32\0"
"sethostname\0"
"setresuid32\0"
"setresuid\0"
"setreuid32\0"
"setresuid32\0"
"setreuid\0"
"setuid32\0"
"setreuid32\0"
"setuid\0"
"setuid32\0"
"swapoff\0"
"swapon\0"
"_sysctl\0"
"vhangup\0"
},
[SYSCALL_FILTER_SET_PROCESS] = {
@ -593,13 +648,23 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"clone\0"
"execveat\0"
"fork\0"
"getpid\0"
"getppid\0"
"getrusage\0"
"gettid\0"
"kill\0"
"prctl\0"
"rt_sigqueueinfo\0"
"rt_tgsigqueueinfo\0"
"setns\0"
"tgkill\0"
"times\0"
"tkill\0"
"unshare\0"
"vfork\0"
"wait4\0"
"waitid\0"
"waitpid\0"
},
[SYSCALL_FILTER_SET_RAW_IO] = {
.name = "@raw-io",
@ -629,36 +694,56 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.name = "@resources",
.help = "Alter resource settings",
.value =
"sched_setparam\0"
"sched_setscheduler\0"
"sched_setaffinity\0"
"setpriority\0"
"setrlimit\0"
"set_mempolicy\0"
"ioprio_set\0"
"mbind\0"
"migrate_pages\0"
"move_pages\0"
"mbind\0"
"sched_setattr\0"
"nice\0"
"prlimit64\0"
"sched_setaffinity\0"
"sched_setattr\0"
"sched_setparam\0"
"sched_setscheduler\0"
"set_mempolicy\0"
"setpriority\0"
"setrlimit\0"
},
[SYSCALL_FILTER_SET_SETUID] = {
.name = "@setuid",
.help = "Operations for changing user/group credentials",
.value =
"setgid32\0"
"setgid\0"
"setgroups32\0"
"setgid32\0"
"setgroups\0"
"setregid32\0"
"setgroups32\0"
"setregid\0"
"setresgid32\0"
"setregid32\0"
"setresgid\0"
"setresuid32\0"
"setresgid32\0"
"setresuid\0"
"setreuid32\0"
"setresuid32\0"
"setreuid\0"
"setuid32\0"
"setreuid32\0"
"setuid\0"
"setuid32\0"
},
[SYSCALL_FILTER_SET_SIGNAL] = {
.name = "@signal",
.help = "Process signal handling",
.value =
"rt_sigaction\0"
"rt_sigpending\0"
"rt_sigprocmask\0"
"rt_sigsuspend\0"
"rt_sigtimedwait\0"
"sigaction\0"
"sigaltstack\0"
"signal\0"
"signalfd\0"
"signalfd4\0"
"sigpending\0"
"sigprocmask\0"
"sigsuspend\0"
},
[SYSCALL_FILTER_SET_SWAP] = {
.name = "@swap",
@ -667,6 +752,23 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"swapoff\0"
"swapon\0"
},
[SYSCALL_FILTER_SET_TIMER] = {
.name = "@timer",
.help = "Schedule operations by time",
.value =
"alarm\0"
"getitimer\0"
"setitimer\0"
"timer_create\0"
"timer_delete\0"
"timer_getoverrun\0"
"timer_gettime\0"
"timer_settime\0"
"timerfd_create\0"
"timerfd_gettime\0"
"timerfd_settime\0"
"times\0"
},
};
const SyscallFilterSet *syscall_filter_set_find(const char *name) {
@ -697,8 +799,10 @@ int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name,
const SyscallFilterSet *other;
other = syscall_filter_set_find(name);
if (!other)
if (!other) {
log_debug("Filter set %s is not known!", name);
return -EINVAL;
}
r = seccomp_add_syscall_filter_set(seccomp, other, action, exclude);
if (r < 0)
@ -707,8 +811,10 @@ int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name,
int id;
id = seccomp_syscall_resolve_name(name);
if (id == __NR_SCMP_ERROR)
if (id == __NR_SCMP_ERROR) {
log_debug("System call %s is not known!", name);
return -EINVAL; /* Not known at all? Then that's a real error */
}
r = seccomp_rule_add_exact(seccomp, action, id, 0);
if (r < 0)

View File

@ -44,11 +44,13 @@ enum {
SYSCALL_FILTER_SET_BASIC_IO,
SYSCALL_FILTER_SET_CLOCK,
SYSCALL_FILTER_SET_CPU_EMULATION,
SYSCALL_FILTER_SET_CREDENTIALS,
SYSCALL_FILTER_SET_DEBUG,
SYSCALL_FILTER_SET_FILE_SYSTEM,
SYSCALL_FILTER_SET_IO_EVENT,
SYSCALL_FILTER_SET_IPC,
SYSCALL_FILTER_SET_KEYRING,
SYSCALL_FILTER_SET_MEMLOCK,
SYSCALL_FILTER_SET_MODULE,
SYSCALL_FILTER_SET_MOUNT,
SYSCALL_FILTER_SET_NETWORK_IO,
@ -59,7 +61,9 @@ enum {
SYSCALL_FILTER_SET_REBOOT,
SYSCALL_FILTER_SET_RESOURCES,
SYSCALL_FILTER_SET_SETUID,
SYSCALL_FILTER_SET_SIGNAL,
SYSCALL_FILTER_SET_SWAP,
SYSCALL_FILTER_SET_TIMER,
_SYSCALL_FILTER_SET_MAX
};

View File

@ -612,6 +612,36 @@ static void test_lock_personality(void) {
assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
}
static void test_filter_sets_ordered(void) {
size_t i;
/* Ensure "@default" always remains at the beginning of the list */
assert_se(SYSCALL_FILTER_SET_DEFAULT == 0);
assert_se(streq(syscall_filter_sets[0].name, "@default"));
for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
const char *k, *p = NULL;
/* Make sure each group has a description */
assert_se(!isempty(syscall_filter_sets[0].help));
/* Make sure the groups are ordered alphabetically, except for the first entry */
assert_se(i < 2 || strcmp(syscall_filter_sets[i-1].name, syscall_filter_sets[i].name) < 0);
NULSTR_FOREACH(k, syscall_filter_sets[i].value) {
/* Ensure each syscall list is in itself ordered, but groups before names */
assert_se(!p ||
(*p == '@' && *k != '@') ||
(((*p == '@' && *k == '@') ||
(*p != '@' && *k != '@')) &&
strcmp(p, k) < 0));
p = k;
}
}
}
int main(int argc, char *argv[]) {
log_set_max_level(LOG_DEBUG);
@ -629,6 +659,7 @@ int main(int argc, char *argv[]) {
test_restrict_archs();
test_load_syscall_filter_set_raw();
test_lock_personality();
test_filter_sets_ordered();
return 0;
}