Merge pull request #5893 from keszybz/memorydenywriteexecute

Add support for more arches for MemoryDenyWriteExecute
This commit is contained in:
Lennart Poettering 2017-05-11 19:42:42 +02:00 committed by GitHub
commit 271312e37b
4 changed files with 150 additions and 134 deletions

View file

@ -792,43 +792,10 @@ int seccomp_restrict_namespaces(unsigned long retain) {
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
int clone_reversed_order = -1;
unsigned i;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
switch (arch) {
case SCMP_ARCH_X86_64:
case SCMP_ARCH_X86:
case SCMP_ARCH_X32:
case SCMP_ARCH_PPC64:
case SCMP_ARCH_PPC64LE:
case SCMP_ARCH_MIPS:
case SCMP_ARCH_MIPSEL:
case SCMP_ARCH_MIPS64:
case SCMP_ARCH_MIPSEL64:
case SCMP_ARCH_MIPS64N32:
case SCMP_ARCH_MIPSEL64N32:
clone_reversed_order = 0;
break;
case SCMP_ARCH_S390:
case SCMP_ARCH_S390X:
/* On s390/s390x the first two parameters to clone are switched */
clone_reversed_order = 1;
break;
/* Please add more definitions here, if you port systemd to other architectures! */
#if SECCOMP_RESTRICT_NAMESPACES_BROKEN
# warning "Consider adding the right clone() syscall definitions here!"
#endif
}
if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */
continue;
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
@ -877,7 +844,8 @@ int seccomp_restrict_namespaces(unsigned long retain) {
break;
}
if (clone_reversed_order == 0)
/* On s390/s390x the first two parameters to clone are switched */
if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
@ -972,16 +940,16 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
case SCMP_ARCH_X32:
case SCMP_ARCH_ARM:
case SCMP_ARCH_AARCH64:
case SCMP_ARCH_PPC64:
case SCMP_ARCH_PPC64LE:
/* These we know we support (i.e. are the ones that do not use socketcall()) */
supported = true;
break;
case SCMP_ARCH_X86:
case SCMP_ARCH_S390:
case SCMP_ARCH_S390X:
case SCMP_ARCH_PPC:
case SCMP_ARCH_PPC64:
case SCMP_ARCH_PPC64LE:
case SCMP_ARCH_X86:
default:
/* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
* don't know */
@ -1192,6 +1160,37 @@ int seccomp_restrict_realtime(void) {
return 0;
}
static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
uint32_t arch,
int nr,
unsigned int arg_cnt,
const struct scmp_arg_cmp arg) {
int r;
r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
if (r < 0) {
_cleanup_free_ char *n = NULL;
n = seccomp_syscall_resolve_num_arch(arch, nr);
log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
strna(n),
seccomp_arch_to_string(arch));
}
return r;
}
/* For known architectures, check that syscalls are indeed defined or not. */
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
assert_cc(SCMP_SYS(shmget) > 0);
assert_cc(SCMP_SYS(shmat) > 0);
assert_cc(SCMP_SYS(shmdt) > 0);
#elif defined(__i386__) || defined(__powerpc64__)
assert_cc(SCMP_SYS(shmget) < 0);
assert_cc(SCMP_SYS(shmat) < 0);
assert_cc(SCMP_SYS(shmdt) < 0);
#endif
int seccomp_memory_deny_write_execute(void) {
uint32_t arch;
@ -1208,21 +1207,36 @@ int seccomp_memory_deny_write_execute(void) {
case SCMP_ARCH_X86:
filter_syscall = SCMP_SYS(mmap2);
block_syscall = SCMP_SYS(mmap);
break;
/* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We
* ignore that here, which means there's still a way to get writable/executable memory, if an
* IPC key is mapped like this on i386. That's a pity, but no total loss. */
case SCMP_ARCH_PPC64:
case SCMP_ARCH_PPC64LE:
filter_syscall = SCMP_SYS(mmap);
/* Note that shmat() isn't available, and the call is multiplexed through ipc().
* We ignore that here, which means there's still a way to get writable/executable
* memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
break;
case SCMP_ARCH_AARCH64:
block_syscall = SCMP_SYS(mmap);
/* fall through */
case SCMP_ARCH_ARM:
filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
shmat_syscall = SCMP_SYS(shmat);
break;
case SCMP_ARCH_X86_64:
case SCMP_ARCH_X32:
filter_syscall = SCMP_SYS(mmap);
filter_syscall = SCMP_SYS(mmap); /* amd64 and x32 have only mmap */
shmat_syscall = SCMP_SYS(shmat);
break;
/* Please add more definitions here, if you port systemd to other architectures! */
#if !defined(__i386__) && !defined(__x86_64__)
#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__)
#warning "Consider adding the right mmap() syscall definitions here!"
#endif
}
@ -1235,63 +1249,30 @@ int seccomp_memory_deny_write_execute(void) {
if (r < 0)
return r;
if (filter_syscall != 0) {
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
filter_syscall,
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
if (r < 0) {
_cleanup_free_ char *n = NULL;
n = seccomp_syscall_resolve_num_arch(arch, filter_syscall);
log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
strna(n),
seccomp_arch_to_string(arch));
continue;
}
}
r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
if (r < 0)
continue;
if (block_syscall != 0) {
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
block_syscall,
0);
if (r < 0) {
_cleanup_free_ char *n = NULL;
n = seccomp_syscall_resolve_num_arch(arch, block_syscall);
log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
strna(n),
seccomp_arch_to_string(arch));
r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
if (r < 0)
continue;
}
}
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(mprotect),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
if (r < 0) {
log_debug_errno(r, "Failed to add mprotect() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
if (r < 0)
continue;
}
if (shmat_syscall != 0) {
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(shmat),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
if (r < 0) {
log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
if (r < 0)
continue;
}
}
r = seccomp_load(seccomp);

View file

@ -76,28 +76,6 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist);
int seccomp_restrict_realtime(void);
int seccomp_memory_deny_write_execute(void);
#if defined(__i386__) || defined(__s390x__) || defined(__s390__) || defined(__powerpc64__) || defined(__powerpc__) || defined (__mips__)
/* On these archs, socket() is implemented via the socketcall() syscall multiplexer, and we can't restrict it hence via
* seccomp */
#define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
#else
#define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
#endif
/* mmap() blocking is only available on some archs for now */
#if defined(__x86_64__) || defined(__i386__)
#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 0
#else
#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
#endif
/* we don't know the right order of the clone() parameters except for these archs, for now */
#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__) || defined(__s390__) || defined(__powerpc64__) || defined(__mips__)
#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 0
#else
#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 1
#endif
extern const uint32_t seccomp_local_archs[];
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \

View file

@ -24,7 +24,7 @@
#include "string-util.h"
#include "util.h"
_unused_ \
_unused_
static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
#include "af-from-name.h"

View file

@ -21,8 +21,10 @@
#include <stdlib.h>
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/poll.h>
#include <sys/shm.h>
#include <sys/types.h>
#include <unistd.h>
#include "alloc-util.h"
#include "fd-util.h"
@ -37,6 +39,15 @@
#include "util.h"
#include "virt.h"
#if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__)
/* On these archs, socket() is implemented via the socketcall() syscall multiplexer,
* and we can't restrict it hence via seccomp. */
# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
#else
# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
#endif
static void test_seccomp_arch_to_string(void) {
uint32_t a, b;
const char *name;
@ -158,8 +169,6 @@ static void test_restrict_namespace(void) {
assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
#if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0
if (!is_seccomp_available())
return;
if (geteuid() != 0)
@ -218,7 +227,6 @@ static void test_restrict_namespace(void) {
}
assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
#endif
}
static void test_protect_sysctl(void) {
@ -286,12 +294,12 @@ static void test_restrict_address_families(void) {
assert_se(fd >= 0);
safe_close(fd);
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
assert_se(fd >= 0);
safe_close(fd);
#else
assert_se(socket(AF_UNIX, SOCK_DGRAM, 0) < 0);
assert_se(fd < 0);
assert_se(errno == EAFNOSUPPORT);
#endif
@ -309,19 +317,21 @@ static void test_restrict_address_families(void) {
assert_se(fd >= 0);
safe_close(fd);
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
assert_se(fd >= 0);
safe_close(fd);
fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
assert_se(fd >= 0);
safe_close(fd);
#else
assert_se(socket(AF_UNIX, SOCK_DGRAM, 0) < 0);
assert_se(fd < 0);
assert_se(errno == EAFNOSUPPORT);
#endif
assert_se(socket(AF_NETLINK, SOCK_DGRAM, 0) < 0);
fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
assert_se(fd >= 0);
safe_close(fd);
#else
assert_se(fd < 0);
assert_se(errno == EAFNOSUPPORT);
#endif
@ -369,7 +379,7 @@ static void test_restrict_realtime(void) {
assert_se(wait_for_terminate_and_warn("realtimeseccomp", pid, true) == EXIT_SUCCESS);
}
static void test_memory_deny_write_execute(void) {
static void test_memory_deny_write_execute_mmap(void) {
pid_t pid;
if (!is_seccomp_available())
@ -393,14 +403,13 @@ static void test_memory_deny_write_execute(void) {
assert_se(seccomp_memory_deny_write_execute() >= 0);
#if SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN
p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
assert_se(p != MAP_FAILED);
assert_se(munmap(p, page_size()) >= 0);
#else
p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc64__) || defined(__arm__) || defined(__aarch64__)
assert_se(p == MAP_FAILED);
assert_se(errno == EPERM);
#else /* unknown architectures */
assert_se(p != MAP_FAILED);
assert_se(munmap(p, page_size()) >= 0);
#endif
p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
@ -410,7 +419,54 @@ static void test_memory_deny_write_execute(void) {
_exit(EXIT_SUCCESS);
}
assert_se(wait_for_terminate_and_warn("memoryseccomp", pid, true) == EXIT_SUCCESS);
assert_se(wait_for_terminate_and_warn("memoryseccomp-mmap", pid, true) == EXIT_SUCCESS);
}
static void test_memory_deny_write_execute_shmat(void) {
int shmid;
pid_t pid;
if (!is_seccomp_available())
return;
if (geteuid() != 0)
return;
shmid = shmget(IPC_PRIVATE, page_size(), 0);
assert_se(shmid >= 0);
pid = fork();
assert_se(pid >= 0);
if (pid == 0) {
void *p;
p = shmat(shmid, NULL, 0);
assert_se(p != MAP_FAILED);
assert_se(shmdt(p) == 0);
p = shmat(shmid, NULL, SHM_EXEC);
assert_se(p != MAP_FAILED);
assert_se(shmdt(p) == 0);
assert_se(seccomp_memory_deny_write_execute() >= 0);
p = shmat(shmid, NULL, SHM_EXEC);
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
assert_se(p == MAP_FAILED);
assert_se(errno == EPERM);
#else /* __i386__, __powerpc64__, and "unknown" architectures */
assert_se(p != MAP_FAILED);
assert_se(shmdt(p) == 0);
#endif
p = shmat(shmid, NULL, 0);
assert_se(p != MAP_FAILED);
assert_se(shmdt(p) == 0);
_exit(EXIT_SUCCESS);
}
assert_se(wait_for_terminate_and_warn("memoryseccomp-shmat", pid, true) == EXIT_SUCCESS);
}
static void test_restrict_archs(void) {
@ -509,7 +565,8 @@ int main(int argc, char *argv[]) {
test_protect_sysctl();
test_restrict_address_families();
test_restrict_realtime();
test_memory_deny_write_execute();
test_memory_deny_write_execute_mmap();
test_memory_deny_write_execute_shmat();
test_restrict_archs();
test_load_syscall_filter_set_raw();