2020-11-09 05:23:58 +01:00
|
|
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
2014-02-13 00:24:00 +01:00
|
|
|
#pragma once
|
|
|
|
|
2014-12-12 02:35:39 +01:00
|
|
|
#include <seccomp.h>
|
2016-10-21 21:48:10 +02:00
|
|
|
#include <stdbool.h>
|
2015-12-03 21:13:37 +01:00
|
|
|
#include <stdint.h>
|
2014-02-13 00:24:00 +01:00
|
|
|
|
2020-08-05 15:31:26 +02:00
|
|
|
#include "errno-list.h"
|
|
|
|
#include "parse-util.h"
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
#include "set.h"
|
2020-08-05 15:31:26 +02:00
|
|
|
#include "string-util.h"
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
|
2014-02-13 00:24:00 +01:00
|
|
|
const char* seccomp_arch_to_string(uint32_t c);
|
|
|
|
int seccomp_arch_from_string(const char *n, uint32_t *ret);
|
2014-02-18 22:14:00 +01:00
|
|
|
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action);
|
2016-06-01 11:56:01 +02:00
|
|
|
|
2016-08-22 21:40:58 +02:00
|
|
|
bool is_seccomp_available(void);
|
|
|
|
|
2016-10-21 21:50:05 +02:00
|
|
|
typedef struct SyscallFilterSet {
|
|
|
|
const char *name;
|
2016-11-02 17:24:34 +01:00
|
|
|
const char *help;
|
2016-06-01 11:56:01 +02:00
|
|
|
const char *value;
|
2016-10-21 21:50:05 +02:00
|
|
|
} SyscallFilterSet;
|
|
|
|
|
|
|
|
enum {
|
2020-08-19 17:43:23 +02:00
|
|
|
/* Please leave DEFAULT first and KNOWN last, but sort the rest alphabetically */
|
2016-11-02 17:01:04 +01:00
|
|
|
SYSCALL_FILTER_SET_DEFAULT,
|
2017-09-30 14:34:50 +02:00
|
|
|
SYSCALL_FILTER_SET_AIO,
|
2016-11-02 15:46:18 +01:00
|
|
|
SYSCALL_FILTER_SET_BASIC_IO,
|
2017-09-30 14:34:50 +02:00
|
|
|
SYSCALL_FILTER_SET_CHOWN,
|
2016-10-21 21:50:05 +02:00
|
|
|
SYSCALL_FILTER_SET_CLOCK,
|
|
|
|
SYSCALL_FILTER_SET_CPU_EMULATION,
|
|
|
|
SYSCALL_FILTER_SET_DEBUG,
|
2016-11-22 01:29:12 +01:00
|
|
|
SYSCALL_FILTER_SET_FILE_SYSTEM,
|
2016-10-21 21:50:05 +02:00
|
|
|
SYSCALL_FILTER_SET_IO_EVENT,
|
|
|
|
SYSCALL_FILTER_SET_IPC,
|
|
|
|
SYSCALL_FILTER_SET_KEYRING,
|
2017-09-13 19:55:16 +02:00
|
|
|
SYSCALL_FILTER_SET_MEMLOCK,
|
2016-10-21 21:50:05 +02:00
|
|
|
SYSCALL_FILTER_SET_MODULE,
|
|
|
|
SYSCALL_FILTER_SET_MOUNT,
|
|
|
|
SYSCALL_FILTER_SET_NETWORK_IO,
|
|
|
|
SYSCALL_FILTER_SET_OBSOLETE,
|
2019-11-08 12:56:56 +01:00
|
|
|
SYSCALL_FILTER_SET_PKEY,
|
2016-10-21 21:50:05 +02:00
|
|
|
SYSCALL_FILTER_SET_PRIVILEGED,
|
|
|
|
SYSCALL_FILTER_SET_PROCESS,
|
|
|
|
SYSCALL_FILTER_SET_RAW_IO,
|
2016-12-27 14:26:55 +01:00
|
|
|
SYSCALL_FILTER_SET_REBOOT,
|
2016-11-02 15:46:18 +01:00
|
|
|
SYSCALL_FILTER_SET_RESOURCES,
|
2017-08-09 15:04:05 +02:00
|
|
|
SYSCALL_FILTER_SET_SETUID,
|
2017-09-13 19:55:16 +02:00
|
|
|
SYSCALL_FILTER_SET_SIGNAL,
|
2016-12-27 14:26:55 +01:00
|
|
|
SYSCALL_FILTER_SET_SWAP,
|
2017-09-30 14:34:50 +02:00
|
|
|
SYSCALL_FILTER_SET_SYNC,
|
2018-04-18 21:19:54 +02:00
|
|
|
SYSCALL_FILTER_SET_SYSTEM_SERVICE,
|
2017-09-13 19:55:16 +02:00
|
|
|
SYSCALL_FILTER_SET_TIMER,
|
2020-08-19 17:43:23 +02:00
|
|
|
SYSCALL_FILTER_SET_KNOWN,
|
2016-10-21 21:50:05 +02:00
|
|
|
_SYSCALL_FILTER_SET_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
extern const SyscallFilterSet syscall_filter_sets[];
|
|
|
|
|
|
|
|
const SyscallFilterSet *syscall_filter_set_find(const char *name);
|
|
|
|
|
2017-11-11 13:35:49 +01:00
|
|
|
int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set);
|
2017-08-09 16:09:04 +02:00
|
|
|
|
2020-08-21 17:21:04 +02:00
|
|
|
int seccomp_add_syscall_filter_item(
|
|
|
|
scmp_filter_ctx *ctx,
|
|
|
|
const char *name,
|
|
|
|
uint32_t action,
|
|
|
|
char **exclude,
|
|
|
|
bool log_missing,
|
|
|
|
char ***added);
|
2017-09-10 19:10:29 +02:00
|
|
|
|
2018-09-24 16:59:12 +02:00
|
|
|
int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing);
|
|
|
|
int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing);
|
2016-11-02 03:25:19 +01:00
|
|
|
|
2018-02-26 12:51:35 +01:00
|
|
|
typedef enum SeccompParseFlags {
|
2018-06-07 16:03:43 +02:00
|
|
|
SECCOMP_PARSE_INVERT = 1 << 0,
|
2020-06-23 08:31:16 +02:00
|
|
|
SECCOMP_PARSE_ALLOW_LIST = 1 << 1,
|
2018-06-07 16:03:43 +02:00
|
|
|
SECCOMP_PARSE_LOG = 1 << 2,
|
|
|
|
SECCOMP_PARSE_PERMISSIVE = 1 << 3,
|
2018-02-26 12:51:35 +01:00
|
|
|
} SeccompParseFlags;
|
|
|
|
|
2019-04-03 09:17:42 +02:00
|
|
|
int seccomp_parse_syscall_filter(
|
|
|
|
const char *name,
|
|
|
|
int errno_num,
|
|
|
|
Hashmap *filter,
|
|
|
|
SeccompParseFlags flags,
|
|
|
|
const char *unit,
|
|
|
|
const char *filename, unsigned line);
|
2017-12-23 10:45:32 +01:00
|
|
|
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
int seccomp_restrict_archs(Set *archs);
|
2016-11-02 03:25:19 +01:00
|
|
|
int seccomp_restrict_namespaces(unsigned long retain);
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
int seccomp_protect_sysctl(void);
|
2019-11-05 02:17:01 +01:00
|
|
|
int seccomp_protect_syslog(void);
|
2020-06-23 08:31:16 +02:00
|
|
|
int seccomp_restrict_address_families(Set *address_families, bool allow_list);
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
int seccomp_restrict_realtime(void);
|
|
|
|
int seccomp_memory_deny_write_execute(void);
|
2017-07-04 14:48:18 +02:00
|
|
|
int seccomp_lock_personality(unsigned long personality);
|
2019-02-08 18:25:00 +01:00
|
|
|
int seccomp_protect_hostname(void);
|
2019-03-20 19:00:28 +01:00
|
|
|
int seccomp_restrict_suid_sgid(void);
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
|
2020-10-29 00:51:30 +01:00
|
|
|
extern uint32_t seccomp_local_archs[];
|
|
|
|
|
|
|
|
#define SECCOMP_LOCAL_ARCH_END UINT32_MAX
|
|
|
|
|
|
|
|
/* Note: 0 is safe to use here because although SCMP_ARCH_NATIVE is 0, it would
|
|
|
|
* never be in the seccomp_local_archs array anyway so we can use it as a
|
|
|
|
* marker. */
|
|
|
|
#define SECCOMP_LOCAL_ARCH_BLOCKED 0
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
|
|
|
|
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
|
|
|
|
for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
|
2020-10-29 00:51:30 +01:00
|
|
|
(arch) != SECCOMP_LOCAL_ARCH_END; \
|
|
|
|
(arch) = seccomp_local_archs[++_i]) \
|
|
|
|
if ((arch) != SECCOMP_LOCAL_ARCH_BLOCKED)
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
|
2019-04-11 01:08:41 +02:00
|
|
|
/* EACCES: does not have the CAP_SYS_ADMIN or no_new_privs == 1
|
|
|
|
* ENOMEM: out of memory, failed to allocate space for a libseccomp structure, or would exceed a defined constant
|
|
|
|
* EFAULT: addresses passed as args (by libseccomp) are invalid */
|
|
|
|
#define ERRNO_IS_SECCOMP_FATAL(r) \
|
|
|
|
IN_SET(abs(r), EPERM, EACCES, ENOMEM, EFAULT)
|
|
|
|
|
seccomp: rework seccomp code, to improve compat with some archs
This substantially reworks the seccomp code, to ensure better
compatibility with some architectures, including i386.
So far we relied on libseccomp's internal handling of the multiple
syscall ABIs supported on Linux. This is problematic however, as it does
not define clear semantics if an ABI is not able to support specific
seccomp rules we install.
This rework hence changes a couple of things:
- We no longer use seccomp_rule_add(), but only
seccomp_rule_add_exact(), and fail the installation of a filter if the
architecture doesn't support it.
- We no longer rely on adding multiple syscall architectures to a single filter,
but instead install a separate filter for each syscall architecture
supported. This way, we can install a strict filter for x86-64, while
permitting a less strict filter for i386.
- All high-level filter additions are now moved from execute.c to
seccomp-util.c, so that we can test them independently of the service
execution logic.
- Tests have been added for all types of our seccomp filters.
- SystemCallFilters= and SystemCallArchitectures= are now implemented in
independent filters and installation logic, as they semantically are
very much independent of each other.
Fixes: #4575
2016-12-27 15:28:25 +01:00
|
|
|
DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
|
2017-08-02 06:46:45 +02:00
|
|
|
|
2020-06-05 15:12:29 +02:00
|
|
|
int parse_syscall_archs(char **l, Set **ret_archs);
|
2019-04-29 11:54:00 +02:00
|
|
|
|
|
|
|
uint32_t scmp_act_kill_process(void);
|
2020-08-05 15:31:26 +02:00
|
|
|
|
|
|
|
/* This is a special value to be used where syscall filters otherwise expect errno numbers, will be
|
|
|
|
replaced with real seccomp action. */
|
|
|
|
enum {
|
|
|
|
SECCOMP_ERROR_NUMBER_KILL = INT_MAX - 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline bool seccomp_errno_or_action_is_valid(int n) {
|
|
|
|
return n == SECCOMP_ERROR_NUMBER_KILL || errno_is_valid(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int seccomp_parse_errno_or_action(const char *p) {
|
|
|
|
if (streq_ptr(p, "kill"))
|
|
|
|
return SECCOMP_ERROR_NUMBER_KILL;
|
|
|
|
return parse_errno(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline const char *seccomp_errno_or_action_to_string(int num) {
|
|
|
|
if (num == SECCOMP_ERROR_NUMBER_KILL)
|
|
|
|
return "kill";
|
|
|
|
return errno_to_name(num);
|
|
|
|
}
|